1use crate::fine::{NumericVec, PosExt, ShaderResultF32};
9use crate::kurbo::{Point, Vec2};
10use vello_common::encode::EncodedBlurredRoundedRectangle;
11use vello_common::fearless_simd::{Simd, SimdBase, SimdFloat, f32x8, u8x16};
12
13#[cfg(not(feature = "std"))]
14use vello_common::kurbo::common::FloatFuncs as _;
15
16#[derive(Debug)]
17pub(crate) struct BlurredRoundedRectFiller<S: Simd> {
18 r: f32x8<S>,
19 g: f32x8<S>,
20 b: f32x8<S>,
21 a: f32x8<S>,
22 alpha_calculator: AlphaCalculator<S>,
23}
24
25impl<S: Simd> BlurredRoundedRectFiller<S> {
26 pub(crate) fn new(
27 simd: S,
28 rect: &EncodedBlurredRoundedRectangle,
29 start_x: u16,
30 start_y: u16,
31 ) -> Self {
32 let start_pos = rect.transform * Point::new(f64::from(start_x), f64::from(start_y));
33 let color_components = rect.color.as_premul_f32().components;
34 let r = f32x8::splat(simd, color_components[0]);
35 let g = f32x8::splat(simd, color_components[1]);
36 let b = f32x8::splat(simd, color_components[2]);
37 let a = f32x8::splat(simd, color_components[3]);
38 let simd_rect = SimdRoundedBlurredRect::new(rect, simd);
39 let alpha_calculator =
40 AlphaCalculator::new(start_pos, rect.x_advance, rect.y_advance, simd_rect, simd);
41
42 Self {
43 alpha_calculator,
44 r,
45 g,
46 b,
47 a,
48 }
49 }
50}
51
52impl<S: Simd> Iterator for BlurredRoundedRectFiller<S> {
53 type Item = ShaderResultF32<S>;
54
55 fn next(&mut self) -> Option<Self::Item> {
56 let next = self.alpha_calculator.next().unwrap();
57 let r = self.r * next;
58 let g = self.g * next;
59 let b = self.b * next;
60 let a = self.a * next;
61
62 Some(ShaderResultF32 { r, g, b, a })
63 }
64}
65
66impl<S: Simd> crate::fine::Painter for BlurredRoundedRectFiller<S> {
67 fn paint_u8(&mut self, buf: &mut [u8]) {
68 for chunk in buf.chunks_exact_mut(64) {
69 let first = self.next().unwrap();
70 let simd = first.r.simd;
71 let second = self.next().unwrap();
72
73 let r = u8x16::from_f32(simd, simd.combine_f32x8(first.r, second.r));
74 let g = u8x16::from_f32(simd, simd.combine_f32x8(first.g, second.g));
75 let b = u8x16::from_f32(simd, simd.combine_f32x8(first.b, second.b));
76 let a = u8x16::from_f32(simd, simd.combine_f32x8(first.a, second.a));
77
78 let combined = simd.combine_u8x32(simd.combine_u8x16(r, g), simd.combine_u8x16(b, a));
79
80 simd.store_interleaved_128_u8x64(combined, (&mut chunk[..]).try_into().unwrap());
81 }
82 }
83
84 fn paint_f32(&mut self, buf: &mut [f32]) {
85 for chunk in buf.chunks_exact_mut(32) {
86 let (c1, c2) = self.next().unwrap().get();
87 c1.simd
88 .store_interleaved_128_f32x16(c1, (&mut chunk[..16]).try_into().unwrap());
89 c2.simd
90 .store_interleaved_128_f32x16(c2, (&mut chunk[16..]).try_into().unwrap());
91 }
92 }
93}
94
95#[derive(Debug)]
96struct AlphaCalculator<S: Simd> {
97 cur_pos: Point,
98 x_advance: Vec2,
99 y_advance: Vec2,
100 r: SimdRoundedBlurredRect<S>,
101 simd: S,
102}
103
104impl<S: Simd> AlphaCalculator<S> {
105 fn new(
106 start_pos: Point,
107 x_advance: Vec2,
108 y_advance: Vec2,
109 r: SimdRoundedBlurredRect<S>,
110 simd: S,
111 ) -> Self {
112 Self {
113 cur_pos: start_pos,
114 x_advance,
115 y_advance,
116 r,
117 simd,
118 }
119 }
120}
121
122impl<S: Simd> Iterator for AlphaCalculator<S> {
123 type Item = f32x8<S>;
124
125 fn next(&mut self) -> Option<Self::Item> {
126 let i = f32x8::splat_pos(
127 self.simd,
128 self.cur_pos.x as f32,
129 self.x_advance.x as f32,
130 self.y_advance.x as f32,
131 );
132 let j = f32x8::splat_pos(
133 self.simd,
134 self.cur_pos.y as f32,
135 self.x_advance.y as f32,
136 self.y_advance.y as f32,
137 );
138 let r = &self.r;
139
140 let y = j - r.v1.msub(r.height, r.v1);
142 let y0 = r.r1 - r.h.msub(r.v1, y.abs());
144 let y1 = y0.max(r.v0);
145
146 let x = i - r.v1.msub(r.width, r.v1);
148 let x0 = r.r1 - r.w.msub(r.v1, x.abs());
150 let x1 = x0.max(r.v0);
151 let d_pos = (x1.powf(r.exponent) + y1.powf(r.exponent)).powf(r.recip_exponent);
152 let d_neg = x0.max(y0).min(r.v0);
153 let d = d_pos + d_neg - r.r1;
154 let z = r.scale
155 * (f32x8::compute_erf7(self.simd, r.std_dev_inv * (r.min_edge + d))
156 - f32x8::compute_erf7(self.simd, r.std_dev_inv * d));
157
158 self.cur_pos += 2.0 * self.x_advance;
159
160 Some(z)
161 }
162}
163
164#[derive(Debug)]
165struct SimdRoundedBlurredRect<S: Simd> {
166 pub exponent: f32,
167 pub recip_exponent: f32,
168 pub scale: f32x8<S>,
169 pub std_dev_inv: f32x8<S>,
170 pub min_edge: f32x8<S>,
171 pub w: f32x8<S>,
172 pub h: f32x8<S>,
173 pub width: f32x8<S>,
174 pub height: f32x8<S>,
175 pub r1: f32x8<S>,
176 pub v0: f32x8<S>,
177 pub v1: f32x8<S>,
178}
179
180impl<S: Simd> SimdRoundedBlurredRect<S> {
181 fn new(encoded: &EncodedBlurredRoundedRectangle, s: S) -> Self {
182 let h = f32x8::splat(s, encoded.h);
183 let w = f32x8::splat(s, encoded.w);
184 let width = f32x8::splat(s, encoded.width);
185 let height = f32x8::splat(s, encoded.height);
186 let r1 = f32x8::splat(s, encoded.r1);
187 let exponent = encoded.exponent;
188 let recip_exponent = encoded.recip_exponent;
189 let scale = f32x8::splat(s, encoded.scale);
190 let min_edge = f32x8::splat(s, encoded.min_edge);
191 let std_dev_inv = f32x8::splat(s, encoded.std_dev_inv);
192 let v0 = f32x8::splat(s, 0.0);
193 let v1 = f32x8::splat(s, 0.5);
194
195 Self {
196 exponent,
197 recip_exponent,
198 scale,
199 std_dev_inv,
200 min_edge,
201 w,
202 v0,
203 v1,
204 h,
205 width,
206 height,
207 r1,
208 }
209 }
210}
211
212trait FloatExt<S: Simd> {
213 fn compute_erf7(simd: S, x: Self) -> Self;
218 fn powf(self, x: f32) -> Self;
219}
220
221impl<S: Simd> FloatExt<S> for f32x8<S> {
222 fn compute_erf7(simd: S, x: Self) -> Self {
223 let x = x * Self::splat(simd, core::f32::consts::FRAC_2_SQRT_PI);
224 let xx = x * x;
225 let p1 = Self::splat(simd, 0.0104).madd(xx, Self::splat(simd, 0.03395));
226 let p2 = p1.madd(xx, Self::splat(simd, 0.24295));
227 let p3 = x * xx;
228 let x = p2.madd(p3, x);
229 let denom = x.madd(x, Self::splat(simd, 1.0)).sqrt();
230 x / denom
231 }
232
233 #[inline]
234 fn powf(mut self, x: f32) -> Self {
235 self.val[0] = self.val[0].powf(x);
237 self.val[1] = self.val[1].powf(x);
238 self.val[2] = self.val[2].powf(x);
239 self.val[3] = self.val[3].powf(x);
240 self.val[4] = self.val[4].powf(x);
241 self.val[5] = self.val[5].powf(x);
242 self.val[6] = self.val[6].powf(x);
243 self.val[7] = self.val[7].powf(x);
244
245 self
246 }
247}