Skip to main content

vello_cpu/fine/common/gradient/
mod.rs

1// Copyright 2025 the Vello Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4use crate::fine::{NumericVec, PosExt};
5use crate::kurbo::Point;
6use crate::peniko;
7use core::slice::ChunksExact;
8use vello_common::encode::{EncodedGradient, GradientLut};
9use vello_common::fearless_simd::*;
10
11pub(crate) mod linear;
12pub(crate) mod radial;
13pub(crate) mod sweep;
14
15pub(crate) fn calculate_t_vals<S: Simd, U: SimdGradientKind<S>>(
16    simd: S,
17    kind: U,
18    buf: &mut [f32],
19    gradient: &EncodedGradient,
20    start_x: u16,
21    start_y: u16,
22) {
23    simd.vectorize(
24        #[inline(always)]
25        || {
26            let mut cur_pos =
27                gradient.transform * Point::new(f64::from(start_x), f64::from(start_y));
28            let x_advances = (gradient.x_advance.x as f32, gradient.x_advance.y as f32);
29            let y_advances = (gradient.y_advance.x as f32, gradient.y_advance.y as f32);
30
31            for buf_part in buf.chunks_exact_mut(8) {
32                let x_pos = f32x8::splat_pos(simd, cur_pos.x as f32, x_advances.0, y_advances.0);
33                let y_pos = f32x8::splat_pos(simd, cur_pos.y as f32, x_advances.1, y_advances.1);
34                let pos = kind.cur_pos(x_pos, y_pos);
35                buf_part.copy_from_slice(pos.as_slice());
36
37                cur_pos += 2.0 * gradient.x_advance;
38            }
39        },
40    );
41}
42
43#[derive(Debug)]
44pub(crate) struct GradientPainter<'a, S: Simd> {
45    gradient: &'a EncodedGradient,
46    lut: &'a GradientLut<f32>,
47    t_vals: ChunksExact<'a, f32>,
48    scale_factor: f32x8<S>,
49    simd: S,
50}
51
52impl<'a, S: Simd> GradientPainter<'a, S> {
53    pub(crate) fn new(simd: S, gradient: &'a EncodedGradient, t_vals: &'a [f32]) -> Self {
54        let lut = gradient.f32_lut(simd);
55        let scale_factor = f32x8::splat(simd, lut.scale_factor());
56
57        Self {
58            gradient,
59            scale_factor,
60            lut,
61            t_vals: t_vals.chunks_exact(8),
62            simd,
63        }
64    }
65}
66
67impl<S: Simd> Iterator for GradientPainter<'_, S> {
68    type Item = u32x8<S>;
69
70    #[inline(always)]
71    fn next(&mut self) -> Option<Self::Item> {
72        let extend = self.gradient.extend;
73        let pos = f32x8::from_slice(self.simd, self.t_vals.next()?);
74        let t_vals = apply_extend(pos, extend);
75
76        let indices = (t_vals * self.scale_factor).to_int::<u32x8<S>>();
77
78        // Clear NaNs.
79        let indices = if let Some(transparent_index) = self.lut.transparent_index() {
80            self.simd.select_u32x8(
81                pos.simd_eq(pos),
82                indices,
83                u32x8::splat(self.simd, transparent_index as u32),
84            )
85        } else {
86            indices
87        };
88
89        Some(indices)
90    }
91}
92
93impl<S: Simd> crate::fine::Painter for GradientPainter<'_, S> {
94    fn paint_u8(&mut self, buf: &mut [u8]) {
95        self.simd.vectorize(
96            #[inline(always)]
97            || {
98                for chunk in buf.chunks_exact_mut(32) {
99                    let indices = self.next().unwrap();
100
101                    let rgbas_1: [f32x4<S>; 4] = core::array::from_fn(|i| {
102                        f32x4::from_slice(self.simd, &self.lut.get(indices[i] as usize))
103                    });
104                    let rgbas_1 = self.simd.combine_f32x8(
105                        self.simd.combine_f32x4(rgbas_1[0], rgbas_1[1]),
106                        self.simd.combine_f32x4(rgbas_1[2], rgbas_1[3]),
107                    );
108                    let rgbas_1 = u8x16::from_f32(self.simd, rgbas_1);
109                    chunk[..16].copy_from_slice(rgbas_1.as_slice());
110
111                    let rgbas_2: [f32x4<S>; 4] = core::array::from_fn(|i| {
112                        f32x4::from_slice(self.simd, &self.lut.get(indices[i + 4] as usize))
113                    });
114                    let rgbas_2 = self.simd.combine_f32x8(
115                        self.simd.combine_f32x4(rgbas_2[0], rgbas_2[1]),
116                        self.simd.combine_f32x4(rgbas_2[2], rgbas_2[3]),
117                    );
118                    let rgbas_2 = u8x16::from_f32(self.simd, rgbas_2);
119                    chunk[16..].copy_from_slice(rgbas_2.as_slice());
120                }
121            },
122        );
123    }
124
125    fn paint_f32(&mut self, buf: &mut [f32]) {
126        self.simd.vectorize(
127            #[inline(always)]
128            || {
129                for chunk in buf.chunks_exact_mut(32) {
130                    let indices = self.next().unwrap();
131                    chunk[0..4].copy_from_slice(&self.lut.get(indices[0] as usize));
132                    chunk[4..8].copy_from_slice(&self.lut.get(indices[1] as usize));
133                    chunk[8..12].copy_from_slice(&self.lut.get(indices[2] as usize));
134                    chunk[12..16].copy_from_slice(&self.lut.get(indices[3] as usize));
135                    chunk[16..20].copy_from_slice(&self.lut.get(indices[4] as usize));
136                    chunk[20..24].copy_from_slice(&self.lut.get(indices[5] as usize));
137                    chunk[24..28].copy_from_slice(&self.lut.get(indices[6] as usize));
138                    chunk[28..32].copy_from_slice(&self.lut.get(indices[7] as usize));
139                }
140            },
141        );
142    }
143}
144
145#[inline(always)]
146pub(crate) fn apply_extend<S: Simd>(val: f32x8<S>, extend: peniko::Extend) -> f32x8<S> {
147    match extend {
148        peniko::Extend::Pad => val.max(0.0).min(1.0),
149        peniko::Extend::Repeat => (val - val.floor()).fract(),
150        // See <https://github.com/google/skia/blob/220738774f7a0ce4a6c7bd17519a336e5e5dea5b/src/opts/SkRasterPipeline_opts.h#L6472-L6475>
151        peniko::Extend::Reflect => ((val - 1.0) - 2.0 * ((val - 1.0) * 0.5).floor() - 1.0)
152            .abs()
153            .max(0.0)
154            .min(1.0),
155    }
156}
157
158pub(crate) trait SimdGradientKind<S: Simd> {
159    fn cur_pos(&self, x_pos: f32x8<S>, y_pos: f32x8<S>) -> f32x8<S>;
160}