Skip to main content

vello_cpu/
util.rs

1// Copyright 2025 the Vello Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4use crate::peniko::ImageQuality;
5use vello_common::encode::EncodedImage;
6use vello_common::fearless_simd::{Simd, SimdBase, f32x4, u8x32};
7use vello_common::math::FloatExt;
8use vello_common::util::Div255Ext;
9
10pub(crate) mod scalar {
11    /// Perform an approximate division by 255.
12    ///
13    /// There are three reasons for having this method.
14    /// 1) Divisions are slower than shifting + adding, and the compiler does not seem to replace
15    ///    divisions by 255 with an equivalent (this was verified by benchmarking; doing / 255 was
16    ///    significantly slower).
17    /// 2) Integer divisions are usually not available in SIMD, so this provides a good baseline
18    ///    implementation.
19    /// 3) There are two options for performing the division: One is to perform the division
20    ///    in a way that completely preserves the rounding semantics of a integer division by
21    ///    255. This could be achieved using the implementation `(val + 1 + (val >> 8)) >> 8`.
22    ///    The second approach (used here) has slightly different rounding behavior to a
23    ///    normal division by 255, but is much faster (see <https://github.com/linebender/vello/issues/904>)
24    ///    and therefore preferable for the high-performance pipeline.
25    ///
26    /// Four properties worth mentioning:
27    /// - This actually calculates the ceiling of `val / 256`.
28    /// - Within the allowed range for `val`, rounding errors do not appear for values divisible by 255, i.e. any call `div_255(val * 255)` will always yield `val`.
29    /// - If there is a discrepancy, this division will always yield a value 1 higher than the original.
30    /// - This holds for values of `val` up to and including `65279`. You should not call this function with higher values.
31    #[inline(always)]
32    pub(crate) const fn div_255(val: u16) -> u16 {
33        debug_assert!(
34            val < 65280,
35            "the properties of `div_255` do not hold for values of `65280` or greater"
36        );
37        (val + 255) >> 8
38    }
39
40    #[cfg(test)]
41    mod tests {
42        use crate::util::scalar::div_255;
43
44        #[test]
45        fn div_255_properties() {
46            for i in 0_u16..256 * 255 {
47                let expected = i / 255;
48                let actual = div_255(i);
49
50                assert!(
51                    expected <= actual,
52                    "In case of a discrepancy, the division should yield a value higher than the original."
53                );
54
55                let diff = expected.abs_diff(actual);
56                assert!(diff <= 1, "Rounding error shouldn't be higher than 1.");
57
58                if i % 255 == 0 {
59                    assert_eq!(diff, 0, "Division should be accurate for multiples of 255.");
60                }
61            }
62        }
63    }
64}
65
66pub(crate) trait NormalizedMulExt {
67    fn normalized_mul(self, other: Self) -> Self;
68}
69
70impl<S: Simd> NormalizedMulExt for u8x32<S> {
71    #[inline(always)]
72    fn normalized_mul(self, other: Self) -> Self {
73        let divided = (self.simd.widen_u8x32(self) * other.simd.widen_u8x32(other)).div_255();
74        self.simd.narrow_u16x32(divided)
75    }
76}
77
78pub(crate) trait EncodedImageExt {
79    fn has_skew(&self) -> bool;
80    fn nearest_neighbor(&self) -> bool;
81}
82
83impl EncodedImageExt for EncodedImage {
84    fn has_skew(&self) -> bool {
85        !(self.x_advance.y as f32).is_nearly_zero() || !(self.y_advance.x as f32).is_nearly_zero()
86    }
87
88    fn nearest_neighbor(&self) -> bool {
89        self.sampler.quality == ImageQuality::Low
90    }
91}
92
93pub(crate) trait Premultiply {
94    fn premultiply(self, alphas: Self) -> Self;
95    fn unpremultiply(self, alphas: Self) -> Self;
96}
97
98impl<S: Simd> Premultiply for f32x4<S> {
99    #[inline(always)]
100    fn premultiply(self, alphas: Self) -> Self {
101        self * alphas
102    }
103
104    #[inline(always)]
105    fn unpremultiply(self, alphas: Self) -> Self {
106        let zero = Self::splat(alphas.simd, 0.0);
107        let divided = self / alphas;
108
109        self.simd
110            .select_f32x4(self.simd.simd_eq_f32x4(alphas, zero), zero, divided)
111    }
112}