vello_cpu/fine/common/
rounded_blurred_rect.rs

1// Copyright 2025 the Vello Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Drawing blurred, rounded rectangles.
5//!
6//! Implementation is adapted from: <https://git.sr.ht/~raph/blurrr/tree/master/src/distfield.rs>.
7
8use crate::fine::{NumericVec, PosExt, ShaderResultF32};
9use crate::kurbo::{Point, Vec2};
10use vello_common::encode::EncodedBlurredRoundedRectangle;
11use vello_common::fearless_simd::{Simd, SimdBase, SimdFloat, f32x8, u8x16};
12
13#[cfg(not(feature = "std"))]
14use vello_common::kurbo::common::FloatFuncs as _;
15
16#[derive(Debug)]
17pub(crate) struct BlurredRoundedRectFiller<S: Simd> {
18    r: f32x8<S>,
19    g: f32x8<S>,
20    b: f32x8<S>,
21    a: f32x8<S>,
22    alpha_calculator: AlphaCalculator<S>,
23}
24
25impl<S: Simd> BlurredRoundedRectFiller<S> {
26    pub(crate) fn new(
27        simd: S,
28        rect: &EncodedBlurredRoundedRectangle,
29        start_x: u16,
30        start_y: u16,
31    ) -> Self {
32        let start_pos = rect.transform * Point::new(f64::from(start_x), f64::from(start_y));
33        let color_components = rect.color.as_premul_f32().components;
34        let r = f32x8::splat(simd, color_components[0]);
35        let g = f32x8::splat(simd, color_components[1]);
36        let b = f32x8::splat(simd, color_components[2]);
37        let a = f32x8::splat(simd, color_components[3]);
38        let simd_rect = SimdRoundedBlurredRect::new(rect, simd);
39        let alpha_calculator =
40            AlphaCalculator::new(start_pos, rect.x_advance, rect.y_advance, simd_rect, simd);
41
42        Self {
43            alpha_calculator,
44            r,
45            g,
46            b,
47            a,
48        }
49    }
50}
51
52impl<S: Simd> Iterator for BlurredRoundedRectFiller<S> {
53    type Item = ShaderResultF32<S>;
54
55    fn next(&mut self) -> Option<Self::Item> {
56        let next = self.alpha_calculator.next().unwrap();
57        let r = self.r * next;
58        let g = self.g * next;
59        let b = self.b * next;
60        let a = self.a * next;
61
62        Some(ShaderResultF32 { r, g, b, a })
63    }
64}
65
66impl<S: Simd> crate::fine::Painter for BlurredRoundedRectFiller<S> {
67    fn paint_u8(&mut self, buf: &mut [u8]) {
68        for chunk in buf.chunks_exact_mut(64) {
69            let first = self.next().unwrap();
70            let simd = first.r.simd;
71            let second = self.next().unwrap();
72
73            let r = u8x16::from_f32(simd, simd.combine_f32x8(first.r, second.r));
74            let g = u8x16::from_f32(simd, simd.combine_f32x8(first.g, second.g));
75            let b = u8x16::from_f32(simd, simd.combine_f32x8(first.b, second.b));
76            let a = u8x16::from_f32(simd, simd.combine_f32x8(first.a, second.a));
77
78            let combined = simd.combine_u8x32(simd.combine_u8x16(r, g), simd.combine_u8x16(b, a));
79
80            simd.store_interleaved_128_u8x64(combined, (&mut chunk[..]).try_into().unwrap());
81        }
82    }
83
84    fn paint_f32(&mut self, buf: &mut [f32]) {
85        for chunk in buf.chunks_exact_mut(32) {
86            let (c1, c2) = self.next().unwrap().get();
87            c1.simd
88                .store_interleaved_128_f32x16(c1, (&mut chunk[..16]).try_into().unwrap());
89            c2.simd
90                .store_interleaved_128_f32x16(c2, (&mut chunk[16..]).try_into().unwrap());
91        }
92    }
93}
94
95#[derive(Debug)]
96struct AlphaCalculator<S: Simd> {
97    cur_pos: Point,
98    x_advance: Vec2,
99    y_advance: Vec2,
100    r: SimdRoundedBlurredRect<S>,
101    simd: S,
102}
103
104impl<S: Simd> AlphaCalculator<S> {
105    fn new(
106        start_pos: Point,
107        x_advance: Vec2,
108        y_advance: Vec2,
109        r: SimdRoundedBlurredRect<S>,
110        simd: S,
111    ) -> Self {
112        Self {
113            cur_pos: start_pos,
114            x_advance,
115            y_advance,
116            r,
117            simd,
118        }
119    }
120}
121
122impl<S: Simd> Iterator for AlphaCalculator<S> {
123    type Item = f32x8<S>;
124
125    fn next(&mut self) -> Option<Self::Item> {
126        let i = f32x8::splat_pos(
127            self.simd,
128            self.cur_pos.x as f32,
129            self.x_advance.x as f32,
130            self.y_advance.x as f32,
131        );
132        let j = f32x8::splat_pos(
133            self.simd,
134            self.cur_pos.y as f32,
135            self.x_advance.y as f32,
136            self.y_advance.y as f32,
137        );
138        let r = &self.r;
139
140        // Equivalent to j + r.v1 - r.v1 * r.height
141        let y = j - r.v1.msub(r.height, r.v1);
142        // Equivalent to r.r1 + y.abs() - (r.h * r.v1)
143        let y0 = r.r1 - r.h.msub(r.v1, y.abs());
144        let y1 = y0.max(r.v0);
145
146        // Equivalent to i + r.v1 - r.v1 * r.width
147        let x = i - r.v1.msub(r.width, r.v1);
148        // Equivalent to r.r1 + x.abs() - (r.w * r.v1)
149        let x0 = r.r1 - r.w.msub(r.v1, x.abs());
150        let x1 = x0.max(r.v0);
151        let d_pos = (x1.powf(r.exponent) + y1.powf(r.exponent)).powf(r.recip_exponent);
152        let d_neg = x0.max(y0).min(r.v0);
153        let d = d_pos + d_neg - r.r1;
154        let z = r.scale
155            * (f32x8::compute_erf7(self.simd, r.std_dev_inv * (r.min_edge + d))
156                - f32x8::compute_erf7(self.simd, r.std_dev_inv * d));
157
158        self.cur_pos += 2.0 * self.x_advance;
159
160        Some(z)
161    }
162}
163
164#[derive(Debug)]
165struct SimdRoundedBlurredRect<S: Simd> {
166    pub exponent: f32,
167    pub recip_exponent: f32,
168    pub scale: f32x8<S>,
169    pub std_dev_inv: f32x8<S>,
170    pub min_edge: f32x8<S>,
171    pub w: f32x8<S>,
172    pub h: f32x8<S>,
173    pub width: f32x8<S>,
174    pub height: f32x8<S>,
175    pub r1: f32x8<S>,
176    pub v0: f32x8<S>,
177    pub v1: f32x8<S>,
178}
179
180impl<S: Simd> SimdRoundedBlurredRect<S> {
181    fn new(encoded: &EncodedBlurredRoundedRectangle, s: S) -> Self {
182        let h = f32x8::splat(s, encoded.h);
183        let w = f32x8::splat(s, encoded.w);
184        let width = f32x8::splat(s, encoded.width);
185        let height = f32x8::splat(s, encoded.height);
186        let r1 = f32x8::splat(s, encoded.r1);
187        let exponent = encoded.exponent;
188        let recip_exponent = encoded.recip_exponent;
189        let scale = f32x8::splat(s, encoded.scale);
190        let min_edge = f32x8::splat(s, encoded.min_edge);
191        let std_dev_inv = f32x8::splat(s, encoded.std_dev_inv);
192        let v0 = f32x8::splat(s, 0.0);
193        let v1 = f32x8::splat(s, 0.5);
194
195        Self {
196            exponent,
197            recip_exponent,
198            scale,
199            std_dev_inv,
200            min_edge,
201            w,
202            v0,
203            v1,
204            h,
205            width,
206            height,
207            r1,
208        }
209    }
210}
211
212trait FloatExt<S: Simd> {
213    // See https://raphlinus.github.io/audio/2018/09/05/sigmoid.html for a little
214    // explanation of this approximation to the erf function.
215    // Doing `inline(always)` seems to reduce performance for some reason.
216    /// Approximate the erf function.
217    fn compute_erf7(simd: S, x: Self) -> Self;
218    fn powf(self, x: f32) -> Self;
219}
220
221impl<S: Simd> FloatExt<S> for f32x8<S> {
222    fn compute_erf7(simd: S, x: Self) -> Self {
223        let x = x * Self::splat(simd, core::f32::consts::FRAC_2_SQRT_PI);
224        let xx = x * x;
225        let p1 = Self::splat(simd, 0.0104).madd(xx, Self::splat(simd, 0.03395));
226        let p2 = p1.madd(xx, Self::splat(simd, 0.24295));
227        let p3 = x * xx;
228        let x = p2.madd(p3, x);
229        let denom = x.madd(x, Self::splat(simd, 1.0)).sqrt();
230        x / denom
231    }
232
233    #[inline]
234    fn powf(mut self, x: f32) -> Self {
235        // TODO: SIMD
236        self.val[0] = self.val[0].powf(x);
237        self.val[1] = self.val[1].powf(x);
238        self.val[2] = self.val[2].powf(x);
239        self.val[3] = self.val[3].powf(x);
240        self.val[4] = self.val[4].powf(x);
241        self.val[5] = self.val[5].powf(x);
242        self.val[6] = self.val[6].powf(x);
243        self.val[7] = self.val[7].powf(x);
244
245        self
246    }
247}