vello_cpu/fine/common/gradient/
mod.rs

1// Copyright 2025 the Vello Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4use crate::fine::{NumericVec, PosExt, ShaderResultF32};
5use crate::kurbo::Point;
6use crate::peniko;
7use core::slice::ChunksExact;
8use vello_common::encode::{EncodedGradient, GradientLut};
9use vello_common::fearless_simd::*;
10
11pub(crate) mod linear;
12pub(crate) mod radial;
13pub(crate) mod sweep;
14
15pub(crate) fn calculate_t_vals<S: Simd, U: SimdGradientKind<S>>(
16    simd: S,
17    kind: U,
18    buf: &mut [f32],
19    gradient: &EncodedGradient,
20    start_x: u16,
21    start_y: u16,
22) {
23    simd.vectorize(
24        #[inline(always)]
25        || {
26            let mut cur_pos =
27                gradient.transform * Point::new(f64::from(start_x), f64::from(start_y));
28            let x_advances = (gradient.x_advance.x as f32, gradient.x_advance.y as f32);
29            let y_advances = (gradient.y_advance.x as f32, gradient.y_advance.y as f32);
30
31            for buf_part in buf.chunks_exact_mut(8) {
32                let x_pos = f32x8::splat_pos(simd, cur_pos.x as f32, x_advances.0, y_advances.0);
33                let y_pos = f32x8::splat_pos(simd, cur_pos.y as f32, x_advances.1, y_advances.1);
34                let pos = kind.cur_pos(x_pos, y_pos);
35                buf_part.copy_from_slice(&pos.val);
36
37                cur_pos += 2.0 * gradient.x_advance;
38            }
39        },
40    );
41}
42
43#[derive(Debug)]
44pub(crate) struct GradientPainter<'a, S: Simd> {
45    gradient: &'a EncodedGradient,
46    lut: &'a GradientLut<f32>,
47    t_vals: ChunksExact<'a, f32>,
48    has_undefined: bool,
49    scale_factor: f32x8<S>,
50    simd: S,
51}
52
53impl<'a, S: Simd> GradientPainter<'a, S> {
54    pub(crate) fn new(
55        simd: S,
56        gradient: &'a EncodedGradient,
57        has_undefined: bool,
58        t_vals: &'a [f32],
59    ) -> Self {
60        let lut = gradient.f32_lut(simd);
61        let scale_factor = f32x8::splat(simd, lut.scale_factor());
62
63        Self {
64            gradient,
65            scale_factor,
66            has_undefined,
67            lut,
68            t_vals: t_vals.chunks_exact(8),
69            simd,
70        }
71    }
72}
73
74impl<S: Simd> Iterator for GradientPainter<'_, S> {
75    type Item = ShaderResultF32<S>;
76
77    #[inline(always)]
78    fn next(&mut self) -> Option<Self::Item> {
79        let extend = self.gradient.extend;
80        let pos = f32x8::from_slice(self.simd, self.t_vals.next()?);
81        let t_vals = apply_extend(pos, extend);
82
83        let indices = {
84            // Clear NaNs.
85            let cleared_t_vals = self.simd.select_f32x8(
86                t_vals.simd_eq(t_vals),
87                t_vals,
88                f32x8::splat(self.simd, 0.0),
89            );
90
91            (cleared_t_vals * self.scale_factor).cvt_u32()
92        };
93
94        let mut r = [0.0_f32; 8];
95        let mut g = [0.0_f32; 8];
96        let mut b = [0.0_f32; 8];
97        let mut a = [0.0_f32; 8];
98
99        // TODO: Investigate whether we can use a loop without performance hit.
100        macro_rules! gather {
101            ($idx:expr) => {
102                let sample = self.lut.get(indices[$idx] as usize);
103                r[$idx] = sample[0];
104                g[$idx] = sample[1];
105                b[$idx] = sample[2];
106                a[$idx] = sample[3];
107            };
108        }
109
110        gather!(0);
111        gather!(1);
112        gather!(2);
113        gather!(3);
114        gather!(4);
115        gather!(5);
116        gather!(6);
117        gather!(7);
118
119        let mut r = f32x8::from_slice(self.simd, &r);
120        let mut g = f32x8::from_slice(self.simd, &g);
121        let mut b = f32x8::from_slice(self.simd, &b);
122        let mut a = f32x8::from_slice(self.simd, &a);
123
124        if self.has_undefined {
125            macro_rules! mask_nan {
126                ($channel:expr) => {
127                    $channel = self.simd.select_f32x8(
128                        // On some architectures, the NaNs of `t_vals` might have been cleared already by
129                        // the `extend` function, so use the original variable as the mask.
130                        // Mask out NaNs with 0.
131                        self.simd.simd_eq_f32x8(pos, pos),
132                        $channel,
133                        f32x8::splat(self.simd, 0.0),
134                    );
135                };
136            }
137
138            mask_nan!(r);
139            mask_nan!(g);
140            mask_nan!(b);
141            mask_nan!(a);
142        }
143
144        Some(ShaderResultF32 { r, g, b, a })
145    }
146}
147
148impl<S: Simd> crate::fine::Painter for GradientPainter<'_, S> {
149    fn paint_u8(&mut self, buf: &mut [u8]) {
150        for chunk in buf.chunks_exact_mut(64) {
151            let first = self.next().unwrap();
152            let simd = first.r.simd;
153            let second = self.next().unwrap();
154
155            let r = u8x16::from_f32(simd, simd.combine_f32x8(first.r, second.r));
156            let g = u8x16::from_f32(simd, simd.combine_f32x8(first.g, second.g));
157            let b = u8x16::from_f32(simd, simd.combine_f32x8(first.b, second.b));
158            let a = u8x16::from_f32(simd, simd.combine_f32x8(first.a, second.a));
159
160            let combined = simd.combine_u8x32(simd.combine_u8x16(r, g), simd.combine_u8x16(b, a));
161
162            simd.store_interleaved_128_u8x64(combined, (&mut chunk[..]).try_into().unwrap());
163        }
164    }
165
166    fn paint_f32(&mut self, buf: &mut [f32]) {
167        for chunk in buf.chunks_exact_mut(32) {
168            let (c1, c2) = self.next().unwrap().get();
169            c1.simd
170                .store_interleaved_128_f32x16(c1, (&mut chunk[..16]).try_into().unwrap());
171            c2.simd
172                .store_interleaved_128_f32x16(c2, (&mut chunk[16..]).try_into().unwrap());
173        }
174    }
175}
176
177#[inline(always)]
178pub(crate) fn apply_extend<S: Simd>(val: f32x8<S>, extend: peniko::Extend) -> f32x8<S> {
179    match extend {
180        peniko::Extend::Pad => val.max(0.0).min(1.0),
181        peniko::Extend::Repeat => (val - val.floor()).fract(),
182        // See <https://github.com/google/skia/blob/220738774f7a0ce4a6c7bd17519a336e5e5dea5b/src/opts/SkRasterPipeline_opts.h#L6472-L6475>
183        peniko::Extend::Reflect => ((val - 1.0) - 2.0 * ((val - 1.0) * 0.5).floor() - 1.0)
184            .abs()
185            .max(0.0)
186            .min(1.0),
187    }
188}
189
190pub(crate) trait SimdGradientKind<S: Simd> {
191    fn cur_pos(&self, x_pos: f32x8<S>, y_pos: f32x8<S>) -> f32x8<S>;
192}