vello_cpu/fine/common/
image.rs

1// Copyright 2025 the Vello Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4use crate::fine::macros::{f32x16_painter, u8x16_painter};
5use crate::fine::{PosExt, Splat4thExt, u8_to_f32};
6use crate::kurbo::Point;
7use crate::peniko::ImageQuality;
8use vello_common::encode::EncodedImage;
9use vello_common::fearless_simd::{Bytes, Simd, SimdBase, SimdFloat, f32x4, f32x16, u8x16, u32x4};
10use vello_common::pixmap::Pixmap;
11use vello_common::simd::element_wise_splat;
12
13/// A painter for nearest-neighbor images with no skewing.
14#[derive(Debug)]
15pub(crate) struct PlainNNImagePainter<'a, S: Simd> {
16    data: ImagePainterData<'a, S>,
17    y_positions: f32x4<S>,
18    cur_x_pos: f32x4<S>,
19    advance: f32,
20    simd: S,
21}
22
23impl<'a, S: Simd> PlainNNImagePainter<'a, S> {
24    pub(crate) fn new(
25        simd: S,
26        image: &'a EncodedImage,
27        pixmap: &'a Pixmap,
28        start_x: u16,
29        start_y: u16,
30    ) -> Self {
31        let data = ImagePainterData::new(simd, image, pixmap, start_x, start_y);
32
33        let y_positions = extend(
34            simd,
35            f32x4::splat_pos(
36                simd,
37                data.cur_pos.y as f32,
38                data.x_advances.1,
39                data.y_advances.1,
40            ),
41            image.sampler.y_extend,
42            data.height,
43            data.height_inv,
44        );
45
46        let cur_x_pos = f32x4::splat_pos(
47            simd,
48            data.cur_pos.x as f32,
49            data.x_advances.0,
50            data.y_advances.0,
51        );
52
53        Self {
54            data,
55            advance: image.x_advance.x as f32,
56            y_positions,
57            cur_x_pos,
58            simd,
59        }
60    }
61}
62
63impl<S: Simd> Iterator for PlainNNImagePainter<'_, S> {
64    type Item = u8x16<S>;
65
66    #[inline(always)]
67    fn next(&mut self) -> Option<Self::Item> {
68        let x_pos = extend(
69            self.simd,
70            self.cur_x_pos,
71            self.data.image.sampler.x_extend,
72            self.data.width,
73            self.data.width_inv,
74        );
75
76        let samples = sample(self.simd, &self.data, x_pos, self.y_positions);
77
78        self.cur_x_pos += self.advance;
79
80        Some(samples)
81    }
82}
83
84u8x16_painter!(PlainNNImagePainter<'_, S>);
85
86/// A painter for nearest-neighbor images with arbitrary transforms.
87#[derive(Debug)]
88pub(crate) struct NNImagePainter<'a, S: Simd> {
89    data: ImagePainterData<'a, S>,
90    simd: S,
91}
92
93impl<'a, S: Simd> NNImagePainter<'a, S> {
94    pub(crate) fn new(
95        simd: S,
96        image: &'a EncodedImage,
97        pixmap: &'a Pixmap,
98        start_x: u16,
99        start_y: u16,
100    ) -> Self {
101        let data = ImagePainterData::new(simd, image, pixmap, start_x, start_y);
102
103        Self { data, simd }
104    }
105}
106
107impl<S: Simd> Iterator for NNImagePainter<'_, S> {
108    type Item = u8x16<S>;
109
110    fn next(&mut self) -> Option<Self::Item> {
111        let x_positions = extend(
112            self.simd,
113            f32x4::splat_pos(
114                self.simd,
115                self.data.cur_pos.x as f32,
116                self.data.x_advances.0,
117                self.data.y_advances.0,
118            ),
119            self.data.image.sampler.x_extend,
120            self.data.width,
121            self.data.width_inv,
122        );
123
124        let y_positions = extend(
125            self.simd,
126            f32x4::splat_pos(
127                self.simd,
128                self.data.cur_pos.y as f32,
129                self.data.x_advances.1,
130                self.data.y_advances.1,
131            ),
132            self.data.image.sampler.y_extend,
133            self.data.height,
134            self.data.height_inv,
135        );
136
137        let samples = sample(self.simd, &self.data, x_positions, y_positions);
138
139        self.data.cur_pos += self.data.image.x_advance;
140
141        Some(samples)
142    }
143}
144
145u8x16_painter!(NNImagePainter<'_, S>);
146
147/// A painter for images with bilinear or bicubic filtering.
148#[derive(Debug)]
149pub(crate) struct FilteredImagePainter<'a, S: Simd> {
150    data: ImagePainterData<'a, S>,
151    simd: S,
152}
153
154impl<'a, S: Simd> FilteredImagePainter<'a, S> {
155    pub(crate) fn new(
156        simd: S,
157        image: &'a EncodedImage,
158        pixmap: &'a Pixmap,
159        start_x: u16,
160        start_y: u16,
161    ) -> Self {
162        let data = ImagePainterData::new(simd, image, pixmap, start_x, start_y);
163
164        Self { data, simd }
165    }
166}
167
168impl<S: Simd> Iterator for FilteredImagePainter<'_, S> {
169    type Item = f32x16<S>;
170
171    fn next(&mut self) -> Option<Self::Item> {
172        let x_positions = f32x4::splat_pos(
173            self.simd,
174            self.data.cur_pos.x as f32,
175            self.data.x_advances.0,
176            self.data.y_advances.0,
177        );
178
179        let y_positions = f32x4::splat_pos(
180            self.simd,
181            self.data.cur_pos.y as f32,
182            self.data.x_advances.1,
183            self.data.y_advances.1,
184        );
185
186        // We have two versions of filtering: `Medium` (bilinear filtering) and
187        // `High` (bicubic filtering).
188
189        // In bilinear filtering, we sample the pixels of the rectangle that spans the
190        // locations (-0.5, -0.5) and (0.5, 0.5), and weight them by the fractional
191        // x/y position using simple linear interpolation in both dimensions.
192        // In bicubic filtering, we instead span a 4x4 grid around the
193        // center of the location we are sampling, and sample those points
194        // using a cubic filter to weight each location's contribution.
195
196        // Note that this `fract` has different behavior for negative numbers than the normal,
197        // one.
198        #[inline(always)]
199        fn fract<S: Simd>(val: f32x4<S>) -> f32x4<S> {
200            val - val.floor()
201        }
202
203        let x_fract = fract(x_positions + 0.5);
204        let y_fract = fract(y_positions + 0.5);
205
206        let mut interpolated_color = f32x16::splat(self.simd, 0.0);
207
208        let sample = |x_pos: f32x4<S>, y_pos: f32x4<S>| {
209            u8_to_f32(sample(self.simd, &self.data, x_pos, y_pos))
210        };
211
212        macro_rules! extend_x {
213            ($idx:expr,$offsets:expr) => {
214                extend(
215                    self.simd,
216                    x_positions + $offsets[$idx],
217                    self.data.image.sampler.y_extend,
218                    self.data.width,
219                    self.data.width_inv,
220                )
221            };
222        }
223
224        macro_rules! extend_y {
225            ($idx:expr,$offsets:expr) => {
226                extend(
227                    self.simd,
228                    y_positions + $offsets[$idx],
229                    self.data.image.sampler.y_extend,
230                    self.data.height,
231                    self.data.height_inv,
232                )
233            };
234        }
235
236        match self.data.image.sampler.quality {
237            ImageQuality::Low => unreachable!(),
238            ImageQuality::Medium => {
239                // <https://github.com/google/skia/blob/220738774f7a0ce4a6c7bd17519a336e5e5dea5b/src/opts/SkRasterPipeline_opts.h#L5039-L5078>
240                let cx = [1.0 - x_fract, x_fract];
241                let cy = [1.0 - y_fract, y_fract];
242
243                // Note that the sum of all cx*cy combinations also yields 1.0 again
244                // (modulo some floating point number impreciseness), ensuring the
245                // colors stay in range.
246
247                const OFFSETS: [f32; 2] = [-0.5, 0.5];
248
249                let x_positions = [extend_x!(0, OFFSETS), extend_x!(1, OFFSETS)];
250
251                let y_positions = [extend_y!(0, OFFSETS), extend_y!(1, OFFSETS)];
252
253                // We sample the corners of rectangle that covers our current position.
254                for x_idx in 0..2 {
255                    let x_positions = x_positions[x_idx];
256
257                    for y_idx in 0..2 {
258                        let y_positions = y_positions[y_idx];
259                        let color_sample = sample(x_positions, y_positions);
260                        let w = element_wise_splat(self.simd, cx[x_idx] * cy[y_idx]);
261
262                        interpolated_color = w.madd(color_sample, interpolated_color);
263                    }
264                }
265
266                interpolated_color *= f32x16::splat(self.simd, 1.0 / 255.0);
267            }
268            ImageQuality::High => {
269                // Compare to <https://github.com/google/skia/blob/84ff153b0093fc83f6c77cd10b025c06a12c5604/src/opts/SkRasterPipeline_opts.h#L5030-L5075>.
270                let cx = weights(self.simd, x_fract);
271                let cy = weights(self.simd, y_fract);
272
273                const OFFSETS: [f32; 4] = [-1.5, -0.5, 0.5, 1.5];
274
275                let x_positions = [
276                    extend_x!(0, OFFSETS),
277                    extend_x!(1, OFFSETS),
278                    extend_x!(2, OFFSETS),
279                    extend_x!(3, OFFSETS),
280                ];
281
282                let y_positions = [
283                    extend_y!(0, OFFSETS),
284                    extend_y!(1, OFFSETS),
285                    extend_y!(2, OFFSETS),
286                    extend_y!(3, OFFSETS),
287                ];
288
289                // Note in particular that it is guaranteed that, similarly to bilinear filtering,
290                // the sum of all cx*cy is 1 (modulo some edge cases).
291
292                // We sample the 4x4 grid around the position we are currently looking at.
293                for x_idx in 0..4 {
294                    let x_positions = x_positions[x_idx];
295                    for y_idx in 0..4 {
296                        let y_positions = y_positions[y_idx];
297
298                        let color_sample = sample(x_positions, y_positions);
299                        let w = element_wise_splat(self.simd, cx[x_idx] * cy[y_idx]);
300
301                        interpolated_color = w.madd(color_sample, interpolated_color);
302                    }
303                }
304
305                interpolated_color *= f32x16::splat(self.simd, 1.0 / 255.0);
306
307                let alphas = interpolated_color.splat_4th();
308
309                // Due to the nature of the cubic filter, it can happen in certain situations
310                // that one of the color components ends up with a higher value than the
311                // alpha component, which isn't permissible because the color is
312                // premultiplied and would lead to overflows when doing source over
313                // compositing with u8-based values. Because of this, we need to clamp
314                // to the alpha value.
315                interpolated_color = interpolated_color
316                    .min(f32x16::splat(self.simd, 1.0))
317                    .max(f32x16::splat(self.simd, 0.0))
318                    .min(alphas);
319            }
320        }
321
322        self.data.cur_pos += self.data.image.x_advance;
323
324        Some(interpolated_color)
325    }
326}
327
328f32x16_painter!(FilteredImagePainter<'_, S>);
329
330/// Common data used by different image painters
331#[derive(Debug)]
332pub(crate) struct ImagePainterData<'a, S: Simd> {
333    pub(crate) cur_pos: Point,
334    pub(crate) image: &'a EncodedImage,
335    pub(crate) pixmap: &'a Pixmap,
336    pub(crate) x_advances: (f32, f32),
337    pub(crate) y_advances: (f32, f32),
338    pub(crate) height: f32x4<S>,
339    pub(crate) height_inv: f32x4<S>,
340    pub(crate) width: f32x4<S>,
341    pub(crate) width_inv: f32x4<S>,
342    pub(crate) width_u32: u32x4<S>,
343}
344
345impl<'a, S: Simd> ImagePainterData<'a, S> {
346    pub(crate) fn new(
347        simd: S,
348        image: &'a EncodedImage,
349        pixmap: &'a Pixmap,
350        start_x: u16,
351        start_y: u16,
352    ) -> Self {
353        let width = pixmap.width() as f32;
354        let height = pixmap.height() as f32;
355        let start_pos = image.transform * Point::new(f64::from(start_x), f64::from(start_y));
356
357        let width_inv = f32x4::splat(simd, 1.0 / width);
358        let height_inv = f32x4::splat(simd, 1.0 / height);
359        let width = f32x4::splat(simd, width);
360        let width_u32 = u32x4::splat(simd, pixmap.width() as u32);
361        let height = f32x4::splat(simd, height);
362
363        let x_advances = (image.x_advance.x as f32, image.x_advance.y as f32);
364        let y_advances = (image.y_advance.x as f32, image.y_advance.y as f32);
365
366        Self {
367            cur_pos: start_pos,
368            pixmap,
369            x_advances,
370            y_advances,
371            image,
372            width,
373            height,
374            width_u32,
375            width_inv,
376            height_inv,
377        }
378    }
379}
380
381#[inline(always)]
382pub(crate) fn sample<S: Simd>(
383    simd: S,
384    data: &ImagePainterData<'_, S>,
385    x_positions: f32x4<S>,
386    y_positions: f32x4<S>,
387) -> u8x16<S> {
388    let idx = x_positions.cvt_u32() + y_positions.cvt_u32() * data.width_u32;
389
390    u32x4::from_slice(
391        simd,
392        &[
393            data.pixmap.sample_idx(idx[0]).to_u32(),
394            data.pixmap.sample_idx(idx[1]).to_u32(),
395            data.pixmap.sample_idx(idx[2]).to_u32(),
396            data.pixmap.sample_idx(idx[3]).to_u32(),
397        ],
398    )
399    .reinterpret_u8()
400}
401
402#[inline(always)]
403pub(crate) fn extend<S: Simd>(
404    simd: S,
405    val: f32x4<S>,
406    extend: crate::peniko::Extend,
407    max: f32x4<S>,
408    inv_max: f32x4<S>,
409) -> f32x4<S> {
410    // We cannot chose f32::EPSILON here because for example 30.0 - f32::EPSILON is still 30.0.
411    // This bias should be large enough for all numbers that we support (i.e. <= u16::MAX).
412    let bias = f32x4::splat(simd, 0.01);
413
414    match extend {
415        // Note that max should be exclusive, so subtract a small bias to enforce that.
416        // Otherwise, we might sample out-of-bounds pixels.
417        crate::peniko::Extend::Pad => val.min(max - bias).max(f32x4::splat(simd, 0.0)),
418        crate::peniko::Extend::Repeat => {
419            // floor := (val * inv_max).floor() * max is the nearest multiple of `max` below val.
420            max.madd(-(val * inv_max).floor(), val)
421                // In certain edge cases, we might still end up with a higher number.
422                .min(max - 1.0)
423        }
424        // <https://github.com/google/skia/blob/220738774f7a0ce4a6c7bd17519a336e5e5dea5b/src/opts/SkRasterPipeline_opts.h#L3274-L3290>
425        crate::peniko::Extend::Reflect => {
426            let u = val
427                - (val * inv_max * f32x4::splat(simd, 0.5)).floor() * f32x4::splat(simd, 2.0) * max;
428            let s = (u * inv_max).floor();
429            let m = u - f32x4::splat(simd, 2.0) * s * (u - max);
430
431            let bias_in_ulps = s.trunc();
432
433            let m_bits = u32x4::from_bytes(m.to_bytes());
434            // This would yield NaN if `m` is 0 and `bias_in_ulps` > 0, but since
435            // our `max` is always an integer number, u and s must also be an integer number
436            // and thus `m_bits` must be 0.
437            // Note that this is a wrapping sub!
438            let biased_bits = m_bits - bias_in_ulps.cvt_u32();
439            f32x4::from_bytes(biased_bits.to_bytes())
440                // In certain edge cases, we might still end up with a higher number.
441                .min(max - 1.0)
442        }
443    }
444}
445
446/// Calculate the weights for a single fractional value.
447fn weights<S: Simd>(simd: S, fract: f32x4<S>) -> [f32x4<S>; 4] {
448    simd.vectorize(
449        #[inline(always)]
450        || {
451            let s = fract.simd;
452            const MF: [[f32; 4]; 4] = mf_resampler();
453
454            [
455                single_weight(
456                    fract,
457                    f32x4::splat(s, MF[0][0]),
458                    f32x4::splat(s, MF[0][1]),
459                    f32x4::splat(s, MF[0][2]),
460                    f32x4::splat(s, MF[0][3]),
461                ),
462                single_weight(
463                    fract,
464                    f32x4::splat(s, MF[1][0]),
465                    f32x4::splat(s, MF[1][1]),
466                    f32x4::splat(s, MF[1][2]),
467                    f32x4::splat(s, MF[1][3]),
468                ),
469                single_weight(
470                    fract,
471                    f32x4::splat(s, MF[2][0]),
472                    f32x4::splat(s, MF[2][1]),
473                    f32x4::splat(s, MF[2][2]),
474                    f32x4::splat(s, MF[2][3]),
475                ),
476                single_weight(
477                    fract,
478                    f32x4::splat(s, MF[3][0]),
479                    f32x4::splat(s, MF[3][1]),
480                    f32x4::splat(s, MF[3][2]),
481                    f32x4::splat(s, MF[3][3]),
482                ),
483            ]
484        },
485    )
486}
487
488/// Calculate a weight based on the fractional value t and the cubic coefficients.
489#[inline(always)]
490fn single_weight<S: Simd>(
491    t: f32x4<S>,
492    a: f32x4<S>,
493    b: f32x4<S>,
494    c: f32x4<S>,
495    d: f32x4<S>,
496) -> f32x4<S> {
497    t.madd(d, c).madd(t, b).madd(t, a)
498}
499
500/// Mitchell filter with the variables B = 1/3 and C = 1/3.
501const fn mf_resampler() -> [[f32; 4]; 4] {
502    cubic_resampler(1.0 / 3.0, 1.0 / 3.0)
503}
504
505/// Cubic resampling logic is borrowed from Skia. See
506/// <https://github.com/google/skia/blob/220fef664978643a47d4559ae9e762b91aba534a/include/core/SkSamplingOptions.h#L33-L50>
507/// for some links to understand how this works. In principle, this macro allows us to define a
508/// resampler kernel based on two variables B and C which can be between 0 and 1, allowing to
509/// change some properties of the cubic interpolation kernel.
510///
511/// As mentioned above, cubic resampling consists of sampling the 16 surrounding pixels of the
512/// target point and interpolating them with a cubic filter.
513/// The generated matrix is 4x4 and represent the coefficients of the cubic function used to
514/// calculate weights based on the `x_fract` and `y_fract` of the location we are looking at.
515const fn cubic_resampler(b: f32, c: f32) -> [[f32; 4]; 4] {
516    [
517        [
518            (1.0 / 6.0) * b,
519            -(3.0 / 6.0) * b - c,
520            (3.0 / 6.0) * b + 2.0 * c,
521            -(1.0 / 6.0) * b - c,
522        ],
523        [
524            1.0 - (2.0 / 6.0) * b,
525            0.0,
526            -3.0 + (12.0 / 6.0) * b + c,
527            2.0 - (9.0 / 6.0) * b - c,
528        ],
529        [
530            (1.0 / 6.0) * b,
531            (3.0 / 6.0) * b + c,
532            3.0 - (15.0 / 6.0) * b - 2.0 * c,
533            -2.0 + (9.0 / 6.0) * b + c,
534        ],
535        [0.0, 0.0, -c, (1.0 / 6.0) * b + c],
536    ]
537}
538
539#[cfg(test)]
540mod tests {
541    use super::*;
542    use vello_common::fearless_simd::Fallback;
543
544    #[test]
545    fn extend_overflow() {
546        let simd = Fallback::new();
547        let max = f32x4::splat(simd, 128.0);
548        let max_inv = 1.0 / max;
549
550        let num = f32x4::splat(simd, 127.00001);
551        let res = extend(simd, num, crate::peniko::Extend::Repeat, max, max_inv);
552
553        assert!(res[0] <= 127.0);
554    }
555}
vello_cpu/fine/common/image.rs

vello_cpu/fine/common/
image.rs