Skip to main content

vello_cpu/fine/common/
image.rs

1// Copyright 2025 the Vello Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4use crate::fine::macros::{f32x16_painter, u8x16_painter};
5use crate::fine::{PosExt, Splat4thExt, u8_to_f32};
6use crate::kurbo::Point;
7use crate::peniko::ImageQuality;
8use vello_common::encode::EncodedImage;
9use vello_common::fearless_simd::{Bytes, Simd, SimdBase, SimdFloat, f32x4, f32x16, u8x16, u32x4};
10use vello_common::pixmap::Pixmap;
11use vello_common::simd::element_wise_splat;
12
13/// A painter for nearest-neighbor images with no skewing.
14#[derive(Debug)]
15pub(crate) struct PlainNNImagePainter<'a, S: Simd> {
16    data: ImagePainterData<'a, S>,
17    y_positions: f32x4<S>,
18    cur_x_pos: f32x4<S>,
19    advance: f32,
20    simd: S,
21}
22
23impl<'a, S: Simd> PlainNNImagePainter<'a, S> {
24    pub(crate) fn new(
25        simd: S,
26        image: &'a EncodedImage,
27        pixmap: &'a Pixmap,
28        start_x: u16,
29        start_y: u16,
30    ) -> Self {
31        let data = ImagePainterData::new(simd, image, pixmap, start_x, start_y);
32
33        let y_positions = extend(
34            simd,
35            f32x4::splat_pos(
36                simd,
37                data.cur_pos.y as f32,
38                data.x_advances.1,
39                data.y_advances.1,
40            ),
41            image.sampler.y_extend,
42            data.height,
43            data.height_inv,
44        );
45
46        let cur_x_pos = f32x4::splat_pos(
47            simd,
48            data.cur_pos.x as f32,
49            data.x_advances.0,
50            data.y_advances.0,
51        );
52
53        Self {
54            data,
55            advance: image.x_advance.x as f32,
56            y_positions,
57            cur_x_pos,
58            simd,
59        }
60    }
61}
62
63impl<S: Simd> Iterator for PlainNNImagePainter<'_, S> {
64    type Item = u8x16<S>;
65
66    #[inline(always)]
67    fn next(&mut self) -> Option<Self::Item> {
68        let x_pos = extend(
69            self.simd,
70            self.cur_x_pos,
71            self.data.image.sampler.x_extend,
72            self.data.width,
73            self.data.width_inv,
74        );
75
76        let samples = sample(self.simd, &self.data, x_pos, self.y_positions);
77
78        self.cur_x_pos += self.advance;
79
80        Some(samples)
81    }
82}
83
84u8x16_painter!(PlainNNImagePainter<'_, S>);
85
86/// A painter for nearest-neighbor images with arbitrary transforms.
87#[derive(Debug)]
88pub(crate) struct NNImagePainter<'a, S: Simd> {
89    data: ImagePainterData<'a, S>,
90    simd: S,
91}
92
93impl<'a, S: Simd> NNImagePainter<'a, S> {
94    pub(crate) fn new(
95        simd: S,
96        image: &'a EncodedImage,
97        pixmap: &'a Pixmap,
98        start_x: u16,
99        start_y: u16,
100    ) -> Self {
101        let data = ImagePainterData::new(simd, image, pixmap, start_x, start_y);
102
103        Self { data, simd }
104    }
105}
106
107impl<S: Simd> Iterator for NNImagePainter<'_, S> {
108    type Item = u8x16<S>;
109
110    fn next(&mut self) -> Option<Self::Item> {
111        let x_positions = extend(
112            self.simd,
113            f32x4::splat_pos(
114                self.simd,
115                self.data.cur_pos.x as f32,
116                self.data.x_advances.0,
117                self.data.y_advances.0,
118            ),
119            self.data.image.sampler.x_extend,
120            self.data.width,
121            self.data.width_inv,
122        );
123
124        let y_positions = extend(
125            self.simd,
126            f32x4::splat_pos(
127                self.simd,
128                self.data.cur_pos.y as f32,
129                self.data.x_advances.1,
130                self.data.y_advances.1,
131            ),
132            self.data.image.sampler.y_extend,
133            self.data.height,
134            self.data.height_inv,
135        );
136
137        let samples = sample(self.simd, &self.data, x_positions, y_positions);
138
139        self.data.cur_pos += self.data.image.x_advance;
140
141        Some(samples)
142    }
143}
144
145u8x16_painter!(NNImagePainter<'_, S>);
146
147/// A painter for images with bilinear or bicubic filtering.
148#[derive(Debug)]
149pub(crate) struct FilteredImagePainter<'a, S: Simd> {
150    data: ImagePainterData<'a, S>,
151    simd: S,
152}
153
154impl<'a, S: Simd> FilteredImagePainter<'a, S> {
155    pub(crate) fn new(
156        simd: S,
157        image: &'a EncodedImage,
158        pixmap: &'a Pixmap,
159        start_x: u16,
160        start_y: u16,
161    ) -> Self {
162        let data = ImagePainterData::new(simd, image, pixmap, start_x, start_y);
163
164        Self { data, simd }
165    }
166}
167
168impl<S: Simd> Iterator for FilteredImagePainter<'_, S> {
169    type Item = f32x16<S>;
170
171    fn next(&mut self) -> Option<Self::Item> {
172        let x_positions = f32x4::splat_pos(
173            self.simd,
174            self.data.cur_pos.x as f32,
175            self.data.x_advances.0,
176            self.data.y_advances.0,
177        );
178
179        let y_positions = f32x4::splat_pos(
180            self.simd,
181            self.data.cur_pos.y as f32,
182            self.data.x_advances.1,
183            self.data.y_advances.1,
184        );
185
186        // We have two versions of filtering: `Medium` (bilinear filtering) and
187        // `High` (bicubic filtering).
188
189        // In bilinear filtering, we sample the pixels of the rectangle that spans the
190        // locations (-0.5, -0.5) and (0.5, 0.5), and weight them by the fractional
191        // x/y position using simple linear interpolation in both dimensions.
192        // In bicubic filtering, we instead span a 4x4 grid around the
193        // center of the location we are sampling, and sample those points
194        // using a cubic filter to weight each location's contribution.
195
196        let x_fract = fract_floor(x_positions + 0.5);
197        let y_fract = fract_floor(y_positions + 0.5);
198
199        let mut interpolated_color = f32x16::splat(self.simd, 0.0);
200
201        let sample = |x_pos: f32x4<S>, y_pos: f32x4<S>| {
202            u8_to_f32(sample(self.simd, &self.data, x_pos, y_pos))
203        };
204
205        macro_rules! extend_x {
206            ($idx:expr,$offsets:expr) => {
207                extend(
208                    self.simd,
209                    x_positions + $offsets[$idx],
210                    self.data.image.sampler.y_extend,
211                    self.data.width,
212                    self.data.width_inv,
213                )
214            };
215        }
216
217        macro_rules! extend_y {
218            ($idx:expr,$offsets:expr) => {
219                extend(
220                    self.simd,
221                    y_positions + $offsets[$idx],
222                    self.data.image.sampler.y_extend,
223                    self.data.height,
224                    self.data.height_inv,
225                )
226            };
227        }
228
229        match self.data.image.sampler.quality {
230            ImageQuality::Low => unreachable!(),
231            ImageQuality::Medium => {
232                // <https://github.com/google/skia/blob/220738774f7a0ce4a6c7bd17519a336e5e5dea5b/src/opts/SkRasterPipeline_opts.h#L5039-L5078>
233                let cx = [1.0 - x_fract, x_fract];
234                let cy = [1.0 - y_fract, y_fract];
235
236                // Note that the sum of all cx*cy combinations also yields 1.0 again
237                // (modulo some floating point number impreciseness), ensuring the
238                // colors stay in range.
239
240                const OFFSETS: [f32; 2] = [-0.5, 0.5];
241
242                let x_positions = [extend_x!(0, OFFSETS), extend_x!(1, OFFSETS)];
243
244                let y_positions = [extend_y!(0, OFFSETS), extend_y!(1, OFFSETS)];
245
246                // We sample the corners of rectangle that covers our current position.
247                for x_idx in 0..2 {
248                    let x_positions = x_positions[x_idx];
249
250                    for y_idx in 0..2 {
251                        let y_positions = y_positions[y_idx];
252                        let color_sample = sample(x_positions, y_positions);
253                        let w = element_wise_splat(self.simd, cx[x_idx] * cy[y_idx]);
254
255                        interpolated_color = w.madd(color_sample, interpolated_color);
256                    }
257                }
258
259                interpolated_color *= f32x16::splat(self.simd, 1.0 / 255.0);
260            }
261            ImageQuality::High => {
262                // Compare to <https://github.com/google/skia/blob/84ff153b0093fc83f6c77cd10b025c06a12c5604/src/opts/SkRasterPipeline_opts.h#L5030-L5075>.
263                let cx = weights(self.simd, x_fract);
264                let cy = weights(self.simd, y_fract);
265
266                const OFFSETS: [f32; 4] = [-1.5, -0.5, 0.5, 1.5];
267
268                let x_positions = [
269                    extend_x!(0, OFFSETS),
270                    extend_x!(1, OFFSETS),
271                    extend_x!(2, OFFSETS),
272                    extend_x!(3, OFFSETS),
273                ];
274
275                let y_positions = [
276                    extend_y!(0, OFFSETS),
277                    extend_y!(1, OFFSETS),
278                    extend_y!(2, OFFSETS),
279                    extend_y!(3, OFFSETS),
280                ];
281
282                // Note in particular that it is guaranteed that, similarly to bilinear filtering,
283                // the sum of all cx*cy is 1 (modulo some edge cases).
284
285                // We sample the 4x4 grid around the position we are currently looking at.
286                for x_idx in 0..4 {
287                    let x_positions = x_positions[x_idx];
288                    for y_idx in 0..4 {
289                        let y_positions = y_positions[y_idx];
290
291                        let color_sample = sample(x_positions, y_positions);
292                        let w = element_wise_splat(self.simd, cx[x_idx] * cy[y_idx]);
293
294                        interpolated_color = w.madd(color_sample, interpolated_color);
295                    }
296                }
297
298                interpolated_color *= f32x16::splat(self.simd, 1.0 / 255.0);
299
300                let alphas = interpolated_color.splat_4th();
301
302                // Due to the nature of the cubic filter, it can happen in certain situations
303                // that one of the color components ends up with a higher value than the
304                // alpha component, which isn't permissible because the color is
305                // premultiplied and would lead to overflows when doing source over
306                // compositing with u8-based values. Because of this, we need to clamp
307                // to the alpha value.
308                interpolated_color = interpolated_color
309                    .min(f32x16::splat(self.simd, 1.0))
310                    .max(f32x16::splat(self.simd, 0.0))
311                    .min(alphas);
312            }
313        }
314
315        self.data.cur_pos += self.data.image.x_advance;
316
317        Some(interpolated_color)
318    }
319}
320
321f32x16_painter!(FilteredImagePainter<'_, S>);
322
323/// Computes the positive fractional part of a value: `val - val.floor()`.
324///
325/// Unlike `f32::fract()`, this always returns a value in [0, 1),
326/// even for negative inputs.
327#[inline(always)]
328pub(crate) fn fract_floor<S: Simd>(val: f32x4<S>) -> f32x4<S> {
329    val - val.floor()
330}
331
332/// Common data used by different image painters
333#[derive(Debug)]
334pub(crate) struct ImagePainterData<'a, S: Simd> {
335    pub(crate) cur_pos: Point,
336    pub(crate) image: &'a EncodedImage,
337    pub(crate) pixmap: &'a Pixmap,
338    pub(crate) x_advances: (f32, f32),
339    pub(crate) y_advances: (f32, f32),
340    pub(crate) height: f32x4<S>,
341    pub(crate) height_inv: f32x4<S>,
342    pub(crate) width: f32x4<S>,
343    pub(crate) width_inv: f32x4<S>,
344    pub(crate) width_u32: u32x4<S>,
345}
346
347impl<'a, S: Simd> ImagePainterData<'a, S> {
348    pub(crate) fn new(
349        simd: S,
350        image: &'a EncodedImage,
351        pixmap: &'a Pixmap,
352        start_x: u16,
353        start_y: u16,
354    ) -> Self {
355        let width = pixmap.width() as f32;
356        let height = pixmap.height() as f32;
357        let start_pos = image.transform * Point::new(f64::from(start_x), f64::from(start_y));
358
359        let width_inv = f32x4::splat(simd, 1.0 / width);
360        let height_inv = f32x4::splat(simd, 1.0 / height);
361        let width = f32x4::splat(simd, width);
362        let width_u32 = u32x4::splat(simd, pixmap.width() as u32);
363        let height = f32x4::splat(simd, height);
364
365        let x_advances = (image.x_advance.x as f32, image.x_advance.y as f32);
366        let y_advances = (image.y_advance.x as f32, image.y_advance.y as f32);
367
368        Self {
369            cur_pos: start_pos,
370            pixmap,
371            x_advances,
372            y_advances,
373            image,
374            width,
375            height,
376            width_u32,
377            width_inv,
378            height_inv,
379        }
380    }
381}
382
383#[inline(always)]
384pub(crate) fn sample<S: Simd>(
385    simd: S,
386    data: &ImagePainterData<'_, S>,
387    x_positions: f32x4<S>,
388    y_positions: f32x4<S>,
389) -> u8x16<S> {
390    let idx = x_positions.to_int::<u32x4<S>>() + y_positions.to_int::<u32x4<S>>() * data.width_u32;
391
392    u32x4::from_slice(
393        simd,
394        &[
395            data.pixmap.sample_idx(idx[0]).to_u32(),
396            data.pixmap.sample_idx(idx[1]).to_u32(),
397            data.pixmap.sample_idx(idx[2]).to_u32(),
398            data.pixmap.sample_idx(idx[3]).to_u32(),
399        ],
400    )
401    .to_bytes()
402}
403
404#[inline(always)]
405pub(crate) fn extend<S: Simd>(
406    simd: S,
407    val: f32x4<S>,
408    extend: crate::peniko::Extend,
409    max: f32x4<S>,
410    inv_max: f32x4<S>,
411) -> f32x4<S> {
412    // We cannot chose f32::EPSILON here because for example 30.0 - f32::EPSILON is still 30.0.
413    // This bias should be large enough for all numbers that we support (i.e. <= u16::MAX).
414    let bias = f32x4::splat(simd, 0.01);
415
416    match extend {
417        // Note that max should be exclusive, so subtract a small bias to enforce that.
418        // Otherwise, we might sample out-of-bounds pixels.
419        crate::peniko::Extend::Pad => val.min(max - bias).max(f32x4::splat(simd, 0.0)),
420        crate::peniko::Extend::Repeat => {
421            // floor := (val * inv_max).floor() * max is the nearest multiple of `max` below val.
422            max.madd(-(val * inv_max).floor(), val)
423                // In certain edge cases, we might still end up with a higher number.
424                .min(max - 1.0)
425        }
426        // <https://github.com/google/skia/blob/220738774f7a0ce4a6c7bd17519a336e5e5dea5b/src/opts/SkRasterPipeline_opts.h#L3274-L3290>
427        crate::peniko::Extend::Reflect => {
428            let u = val
429                - (val * inv_max * f32x4::splat(simd, 0.5)).floor() * f32x4::splat(simd, 2.0) * max;
430            let s = (u * inv_max).floor();
431            let m = u - f32x4::splat(simd, 2.0) * s * (u - max);
432
433            let bias_in_ulps = s.trunc();
434
435            let m_bits = u32x4::from_bytes(m.to_bytes());
436            // This would yield NaN if `m` is 0 and `bias_in_ulps` > 0, but since
437            // our `max` is always an integer number, u and s must also be an integer number
438            // and thus `m_bits` must be 0.
439            // Note that this is a wrapping sub!
440            let biased_bits = m_bits - bias_in_ulps.to_int::<u32x4<S>>();
441            f32x4::from_bytes(biased_bits.to_bytes())
442                // In certain edge cases, we might still end up with a higher number.
443                .min(max - 1.0)
444        }
445    }
446}
447
448/// Calculate the weights for a single fractional value.
449fn weights<S: Simd>(simd: S, fract: f32x4<S>) -> [f32x4<S>; 4] {
450    simd.vectorize(
451        #[inline(always)]
452        || {
453            let s = fract.simd;
454            const MF: [[f32; 4]; 4] = mf_resampler();
455
456            [
457                single_weight(
458                    fract,
459                    f32x4::splat(s, MF[0][0]),
460                    f32x4::splat(s, MF[0][1]),
461                    f32x4::splat(s, MF[0][2]),
462                    f32x4::splat(s, MF[0][3]),
463                ),
464                single_weight(
465                    fract,
466                    f32x4::splat(s, MF[1][0]),
467                    f32x4::splat(s, MF[1][1]),
468                    f32x4::splat(s, MF[1][2]),
469                    f32x4::splat(s, MF[1][3]),
470                ),
471                single_weight(
472                    fract,
473                    f32x4::splat(s, MF[2][0]),
474                    f32x4::splat(s, MF[2][1]),
475                    f32x4::splat(s, MF[2][2]),
476                    f32x4::splat(s, MF[2][3]),
477                ),
478                single_weight(
479                    fract,
480                    f32x4::splat(s, MF[3][0]),
481                    f32x4::splat(s, MF[3][1]),
482                    f32x4::splat(s, MF[3][2]),
483                    f32x4::splat(s, MF[3][3]),
484                ),
485            ]
486        },
487    )
488}
489
490/// Calculate a weight based on the fractional value t and the cubic coefficients.
491#[inline(always)]
492fn single_weight<S: Simd>(
493    t: f32x4<S>,
494    a: f32x4<S>,
495    b: f32x4<S>,
496    c: f32x4<S>,
497    d: f32x4<S>,
498) -> f32x4<S> {
499    t.madd(d, c).madd(t, b).madd(t, a)
500}
501
502/// Mitchell filter with the variables B = 1/3 and C = 1/3.
503const fn mf_resampler() -> [[f32; 4]; 4] {
504    cubic_resampler(1.0 / 3.0, 1.0 / 3.0)
505}
506
507/// Cubic resampling logic is borrowed from Skia. See
508/// <https://github.com/google/skia/blob/220fef664978643a47d4559ae9e762b91aba534a/include/core/SkSamplingOptions.h#L33-L50>
509/// for some links to understand how this works. In principle, this macro allows us to define a
510/// resampler kernel based on two variables B and C which can be between 0 and 1, allowing to
511/// change some properties of the cubic interpolation kernel.
512///
513/// As mentioned above, cubic resampling consists of sampling the 16 surrounding pixels of the
514/// target point and interpolating them with a cubic filter.
515/// The generated matrix is 4x4 and represent the coefficients of the cubic function used to
516/// calculate weights based on the `x_fract` and `y_fract` of the location we are looking at.
517const fn cubic_resampler(b: f32, c: f32) -> [[f32; 4]; 4] {
518    [
519        [
520            (1.0 / 6.0) * b,
521            -(3.0 / 6.0) * b - c,
522            (3.0 / 6.0) * b + 2.0 * c,
523            -(1.0 / 6.0) * b - c,
524        ],
525        [
526            1.0 - (2.0 / 6.0) * b,
527            0.0,
528            -3.0 + (12.0 / 6.0) * b + c,
529            2.0 - (9.0 / 6.0) * b - c,
530        ],
531        [
532            (1.0 / 6.0) * b,
533            (3.0 / 6.0) * b + c,
534            3.0 - (15.0 / 6.0) * b - 2.0 * c,
535            -2.0 + (9.0 / 6.0) * b + c,
536        ],
537        [0.0, 0.0, -c, (1.0 / 6.0) * b + c],
538    ]
539}
540
541#[cfg(test)]
542mod tests {
543    use super::*;
544    use vello_common::fearless_simd::Fallback;
545
546    #[test]
547    fn extend_overflow() {
548        let simd = Fallback::new();
549        let max = f32x4::splat(simd, 128.0);
550        let max_inv = 1.0 / max;
551
552        let num = f32x4::splat(simd, 127.00001);
553        let res = extend(simd, num, crate::peniko::Extend::Repeat, max, max_inv);
554
555        assert!(res[0] <= 127.0);
556    }
557}