1use crate::peniko::{BlendMode, Mix};
5use crate::util::Premultiply;
6use vello_common::fearless_simd::*;
7
8#[derive(Copy, Clone)]
9struct Channels<S: Simd> {
10 r: f32x4<S>,
11 g: f32x4<S>,
12 b: f32x4<S>,
13}
14
15impl<S: Simd> Channels<S> {
16 #[inline(always)]
17 fn unpremultiply(mut self, a: f32x4<S>) -> Self {
18 self.r = self.r.unpremultiply(a);
19 self.g = self.g.unpremultiply(a);
20 self.b = self.b.unpremultiply(a);
21
22 self
23 }
24}
25
26pub(crate) fn mix<S: Simd>(src_c: f32x16<S>, bg: f32x16<S>, blend_mode: BlendMode) -> f32x16<S> {
29 #[expect(deprecated, reason = "Provided by the user, need to handle correctly.")]
30 if matches!(blend_mode.mix, Mix::Normal | Mix::Clip) {
31 return src_c;
32 }
33 let simd = src_c.simd;
35
36 let split = |input: f32x16<S>| {
37 let mut storage = [0.0; 16];
38 simd.store_interleaved_128_f32x16(input, &mut storage);
39 let input_v = f32x16::from_slice(simd, &storage);
40
41 let p1 = simd.split_f32x16(input_v);
42 let (r, g) = simd.split_f32x8(p1.0);
43 let (b, a) = simd.split_f32x8(p1.1);
44
45 (Channels { r, g, b }, a)
46 };
47
48 let (bg_channels, bg_a) = split(bg);
49 let (src_channels, src_a) = split(src_c);
50
51 let unpremultiplied_bg = bg_channels.unpremultiply(bg_a);
52 let unpremultiplied_src = src_channels.unpremultiply(src_a);
53
54 let mut res_bg = unpremultiplied_bg;
55 let mix_src = blend_mode.mix(unpremultiplied_src, unpremultiplied_bg);
56
57 let apply_alpha = |unpremultiplied_src_channel: f32x4<S>,
58 mix_src_channel: f32x4<S>,
59 dest_channel: &mut f32x4<S>| {
60 let p1 = (1.0 - bg_a) * unpremultiplied_src_channel;
61 let p2 = bg_a * mix_src_channel;
62
63 *dest_channel = (p1 + p2).premultiply(src_a);
64 };
65
66 apply_alpha(unpremultiplied_src.r, mix_src.r, &mut res_bg.r);
67 apply_alpha(unpremultiplied_src.g, mix_src.g, &mut res_bg.g);
68 apply_alpha(unpremultiplied_src.b, mix_src.b, &mut res_bg.b);
69
70 let combined = simd.combine_f32x8(
71 simd.combine_f32x4(res_bg.r, res_bg.g),
72 simd.combine_f32x4(res_bg.b, src_a),
73 );
74
75 let mut storage = [0.0; 16];
76 simd.store_interleaved_128_f32x16(combined, &mut storage);
77 f32x16::from_slice(simd, &storage)
78}
79
80trait MixExt {
81 fn mix<S: Simd>(&self, src: Channels<S>, bg: Channels<S>) -> Channels<S>;
82}
83
84impl MixExt for BlendMode {
85 fn mix<S: Simd>(&self, src: Channels<S>, bg: Channels<S>) -> Channels<S> {
86 match self.mix {
87 #[expect(deprecated, reason = "Provided by the user, need to handle correctly.")]
88 Mix::Normal | Mix::Clip => src,
89 Mix::Multiply => Multiply::mix(src, bg),
90 Mix::Screen => Screen::mix(src, bg),
91 Mix::Overlay => Overlay::mix(src, bg),
92 Mix::Darken => Darken::mix(src, bg),
93 Mix::Lighten => Lighten::mix(src, bg),
94 Mix::ColorDodge => ColorDodge::mix(src, bg),
95 Mix::ColorBurn => ColorBurn::mix(src, bg),
96 Mix::HardLight => HardLight::mix(src, bg),
97 Mix::SoftLight => SoftLight::mix(src, bg),
98 Mix::Difference => Difference::mix(src, bg),
99 Mix::Exclusion => Exclusion::mix(src, bg),
100 Mix::Luminosity => Luminosity::mix(src, bg),
101 Mix::Color => Color::mix(src, bg),
102 Mix::Hue => Hue::mix(src, bg),
103 Mix::Saturation => Saturation::mix(src, bg),
104 }
105 }
106}
107
108impl Multiply {
109 #[inline(always)]
110 fn single<S: Simd>(src: f32x4<S>, bg: f32x4<S>) -> f32x4<S> {
111 src * bg
112 }
113}
114
115impl Screen {
116 #[inline(always)]
117 fn single<S: Simd>(src: f32x4<S>, bg: f32x4<S>) -> f32x4<S> {
118 bg + src - src * bg
119 }
120}
121
122impl HardLight {
123 fn single<S: Simd>(src: f32x4<S>, bg: f32x4<S>) -> f32x4<S> {
124 let two = f32x4::splat(src.simd, 2.0);
125
126 let mask = src.simd.simd_le_f32x4(src, f32x4::splat(src.simd, 0.5));
127 let opt1 = Multiply::single(bg, src * two);
128 let opt2 = Screen::single(bg, two * src - 1.0);
129
130 src.simd.select_f32x4(mask, opt1, opt2)
131 }
132}
133
134macro_rules! separable_mix {
135 ($name:ident, $calc:expr) => {
136 pub(crate) struct $name;
137
138 impl $name {
139 #[inline(always)]
140 fn mix<S: Simd>(mut src: Channels<S>, bg: Channels<S>) -> Channels<S> {
141 src.r = $calc(src.r, bg.r);
142 src.g = $calc(src.g, bg.g);
143 src.b = $calc(src.b, bg.b);
144
145 src
146 }
147 }
148 };
149}
150
151separable_mix!(Multiply, |cs: f32x4<S>, cb: f32x4<S>| Multiply::single(
152 cs, cb
153));
154separable_mix!(Screen, |cs: f32x4<S>, cb: f32x4<S>| Screen::single(cs, cb));
155separable_mix!(Overlay, |cs: f32x4<S>, cb: f32x4<S>| HardLight::single(
156 cb, cs
157));
158separable_mix!(Darken, |cs: f32x4<S>, cb: f32x4<S>| cs.min(cb));
159separable_mix!(Lighten, |cs: f32x4<S>, cb: f32x4<S>| cs.max(cb));
160separable_mix!(Difference, |cs: f32x4<S>, cb: f32x4<S>| {
161 cs.simd
162 .select_f32x4(cs.simd.simd_le_f32x4(cs, cb), cb - cs, cs - cb)
163});
164separable_mix!(HardLight, |cs: f32x4<S>, cb: f32x4<S>| HardLight::single(
165 cs, cb
166));
167separable_mix!(Exclusion, |cs: f32x4<S>, cb: f32x4<S>| {
168 (cs + cb) - 2.0 * (cs * cb)
169});
170separable_mix!(SoftLight, |cs: f32x4<S>, cb: f32x4<S>| {
171 let mask_1 = cs.simd.simd_le_f32x4(cb, f32x4::splat(cs.simd, 0.25));
172
173 let d = cs
174 .simd
175 .select_f32x4(mask_1, ((16.0 * cb - 12.0) * cb + 4.0) * cb, cb.sqrt());
176
177 let mask_2 = cs.simd.simd_le_f32x4(cs, f32x4::splat(cs.simd, 0.5));
178
179 cs.simd.select_f32x4(
180 mask_2,
181 cb - (1.0 - 2.0 * cs) * cb * (1.0 - cb),
182 cb + (2.0 * cs - 1.0) * (d - cb),
183 )
184});
185separable_mix!(ColorDodge, |cs: f32x4<S>, cb: f32x4<S>| {
186 let mask_1 = cb.simd.simd_eq_f32x4(cb, f32x4::splat(cb.simd, 0.0));
187 let mask_2 = cs.simd.simd_eq_f32x4(cs, f32x4::splat(cs.simd, 1.0));
188
189 cs.simd.select_f32x4(
190 mask_1,
192 f32x4::splat(cs.simd, 0.0),
193 cs.simd.select_f32x4(
195 mask_2,
196 f32x4::splat(cs.simd, 1.0),
197 f32x4::splat(cs.simd, 1.0).min(cb / (1.0 - cs)),
199 ),
200 )
201});
202separable_mix!(ColorBurn, |cs: f32x4<S>, cb: f32x4<S>| {
203 let mask_1 = cb.simd.simd_eq_f32x4(cb, f32x4::splat(cb.simd, 1.0));
204 let mask_2 = cs.simd.simd_eq_f32x4(cs, f32x4::splat(cs.simd, 0.0));
205
206 cs.simd.select_f32x4(
207 mask_1,
209 f32x4::splat(cs.simd, 1.0),
210 cs.simd.select_f32x4(
212 mask_2,
213 f32x4::splat(cs.simd, 0.0),
214 1.0 - f32x4::splat(cs.simd, 1.0).min((1.0 - cb) / cs),
216 ),
217 )
218});
219
220macro_rules! non_separable_mix {
221 ($name:ident, $calc:expr) => {
222 pub(crate) struct $name;
223
224 impl $name {
225 #[inline(always)]
226 fn mix<S: Simd>(mut src: Channels<S>, mut bg: Channels<S>) -> Channels<S> {
227 $calc(&mut src, &mut bg)
228 }
229 }
230 };
231}
232
233non_separable_mix!(Hue, |cs: &mut Channels<S>, cb: &mut Channels<S>| {
234 set_sat(&mut cs.r, &mut cs.g, &mut cs.b, sat(cb.r, cb.g, cb.b));
235 set_lum(&mut cs.r, &mut cs.g, &mut cs.b, lum(cb.r, cb.g, cb.b));
236
237 *cs
238});
239
240non_separable_mix!(Saturation, |cs: &mut Channels<S>, cb: &mut Channels<S>| {
241 let lum = lum(cb.r, cb.g, cb.b);
242 set_sat(&mut cb.r, &mut cb.g, &mut cb.b, sat(cs.r, cs.g, cs.b));
243 set_lum(&mut cb.r, &mut cb.g, &mut cb.b, lum);
244
245 *cb
246});
247
248non_separable_mix!(Color, |cs: &mut Channels<S>, cb: &mut Channels<S>| {
249 set_lum(&mut cs.r, &mut cs.g, &mut cs.b, lum(cb.r, cb.g, cb.b));
250
251 *cs
252});
253non_separable_mix!(Luminosity, |cs: &mut Channels<S>, cb: &mut Channels<S>| {
254 set_lum(&mut cb.r, &mut cb.g, &mut cb.b, lum(cs.r, cs.g, cs.b));
255
256 *cb
257});
258
259fn lum<S: Simd>(r: f32x4<S>, g: f32x4<S>, b: f32x4<S>) -> f32x4<S> {
260 0.3 * r + 0.59 * g + 0.11 * b
261}
262
263fn sat<S: Simd>(r: f32x4<S>, g: f32x4<S>, b: f32x4<S>) -> f32x4<S> {
264 r.max(g).max(b) - r.min(g).min(b)
265}
266
267fn clip_color<S: Simd>(r: &mut f32x4<S>, g: &mut f32x4<S>, b: &mut f32x4<S>) {
268 let simd = r.simd;
269
270 let l = lum(*r, *g, *b);
271 let n = r.min(g.min(*b));
272 let x = r.max(g.max(*b));
273
274 for c in [r, g, b] {
275 *c = simd.select_f32x4(
276 simd.simd_lt_f32x4(n, f32x4::splat(simd, 0.0)),
277 l + (((*c - l) * l) / (l - n)),
278 *c,
279 );
280
281 *c = simd.select_f32x4(
282 simd.simd_gt_f32x4(x, f32x4::splat(simd, 1.0)),
283 l + (((*c - l) * (1.0 - l)) / (x - l)),
284 *c,
285 );
286 }
287}
288
289fn set_lum<S: Simd>(r: &mut f32x4<S>, g: &mut f32x4<S>, b: &mut f32x4<S>, l: f32x4<S>) {
290 let d = l - lum(*r, *g, *b);
291 *r += d;
292 *g += d;
293 *b += d;
294
295 clip_color(r, g, b);
296}
297
298fn set_sat<S: Simd>(r: &mut f32x4<S>, g: &mut f32x4<S>, b: &mut f32x4<S>, s: f32x4<S>) {
300 let simd = r.simd;
301 let zero = f32x4::splat(simd, 0.0);
302 let mn = r.min(g.min(*b));
303 let mx = r.max(g.max(*b));
304 let sat = mx - mn;
305
306 let scale = |c| simd.select_f32x4(simd.simd_eq_f32x4(sat, zero), zero, (c - mn) * s / sat);
308
309 *r = scale(*r);
310 *g = scale(*g);
311 *b = scale(*b);
312}