1use crate::fine::FineKernel;
5use crate::fine::{COLOR_COMPONENTS, Painter};
6use crate::peniko::BlendMode;
7use crate::region::Region;
8use vello_common::fearless_simd::*;
9use vello_common::paint::PremulColor;
10use vello_common::tile::Tile;
11
12pub(crate) mod blend;
13pub(crate) mod compose;
14
15#[derive(Clone, Copy, Debug)]
17pub struct F32Kernel;
18
19impl<S: Simd> FineKernel<S> for F32Kernel {
20 type Numeric = f32;
21 type Composite = f32x16<S>;
22 type NumericVec = f32x16<S>;
23
24 #[inline(always)]
25 fn extract_color(color: PremulColor) -> [Self::Numeric; 4] {
26 color.as_premul_f32().components
27 }
28
29 #[inline(always)]
30 fn pack(simd: S, region: &mut Region<'_>, blend_buf: &[Self::Numeric]) {
31 simd.vectorize(
32 #[inline(always)]
33 || {
34 for y in 0..Tile::HEIGHT {
35 for (x, pixel) in region
36 .row_mut(y)
37 .chunks_exact_mut(COLOR_COMPONENTS)
38 .enumerate()
39 {
40 let idx =
41 COLOR_COMPONENTS * (usize::from(Tile::HEIGHT) * x + usize::from(y));
42 let start = &blend_buf[idx..];
43 let converted = [
45 (start[0] * 255.0 + 0.5) as u8,
46 (start[1] * 255.0 + 0.5) as u8,
47 (start[2] * 255.0 + 0.5) as u8,
48 (start[3] * 255.0 + 0.5) as u8,
49 ];
50 pixel.copy_from_slice(&converted);
51 }
52 }
53 },
54 );
55 }
56
57 #[inline(never)]
59 fn copy_solid(simd: S, dest: &mut [Self::Numeric], src: [Self::Numeric; 4]) {
60 simd.vectorize(
61 #[inline(always)]
62 || {
63 let color = f32x16::block_splat(src.simd_into(simd));
64
65 for el in dest.chunks_exact_mut(16) {
66 el.copy_from_slice(&color.val);
67 }
68 },
69 );
70 }
71
72 fn apply_mask(
73 simd: S,
74 dest: &mut [Self::Numeric],
75 mut src: impl Iterator<Item = Self::NumericVec>,
76 ) {
77 simd.vectorize(
78 #[inline(always)]
79 || {
80 for el in dest.chunks_exact_mut(16) {
81 let loaded = f32x16::from_slice(simd, el);
82 let mulled = loaded * src.next().unwrap();
83 el.copy_from_slice(&mulled.val);
84 }
85 },
86 );
87 }
88
89 #[inline(always)]
90 fn apply_painter<'a>(_: S, dest: &mut [Self::Numeric], mut painter: impl Painter + 'a) {
91 painter.paint_f32(dest);
92 }
93
94 #[inline(always)]
95 fn alpha_composite_solid(
96 simd: S,
97 dest: &mut [Self::Numeric],
98 src: [Self::Numeric; 4],
99 alphas: Option<&[u8]>,
100 ) {
101 if let Some(alphas) = alphas {
102 alpha_fill::alpha_composite_solid(simd, dest, src, alphas);
103 } else {
104 fill::alpha_composite_solid(simd, dest, src);
105 }
106 }
107
108 fn alpha_composite_buffer(
109 simd: S,
110 dest: &mut [Self::Numeric],
111 src: &[Self::Numeric],
112 alphas: Option<&[u8]>,
113 ) {
114 if let Some(alphas) = alphas {
115 alpha_fill::alpha_composite_arbitrary(
116 simd,
117 dest,
118 src.chunks_exact(16).map(|el| f32x16::from_slice(simd, el)),
119 alphas,
120 );
121 } else {
122 fill::alpha_composite_arbitrary(
123 simd,
124 dest,
125 src.chunks_exact(16).map(|el| f32x16::from_slice(simd, el)),
126 );
127 }
128 }
129
130 fn blend(
131 simd: S,
132 dest: &mut [Self::Numeric],
133 src: impl Iterator<Item = Self::Composite>,
134 blend_mode: BlendMode,
135 alphas: Option<&[u8]>,
136 ) {
137 if let Some(alphas) = alphas {
138 alpha_fill::blend(simd, dest, src, alphas, blend_mode);
139 } else {
140 fill::blend(simd, dest, src, blend_mode);
141 }
142 }
143}
144
145mod fill {
146 use crate::fine::Splat4thExt;
147 use crate::fine::highp::blend;
148 use crate::fine::highp::compose::ComposeExt;
149 use crate::peniko::BlendMode;
150
151 use vello_common::fearless_simd::*;
152 #[inline(always)]
156 pub(super) fn alpha_composite_solid<S: Simd>(s: S, dest: &mut [f32], src: [f32; 4]) {
157 s.vectorize(
158 #[inline(always)]
159 || {
160 let one_minus_alpha = 1.0 - f32x16::block_splat(f32x4::splat(s, src[3]));
161 let src_c = f32x16::block_splat(f32x4::simd_from(src, s));
162
163 for next_dest in dest.chunks_exact_mut(16) {
164 alpha_composite_inner(s, next_dest, src_c, one_minus_alpha);
165 }
166 },
167 );
168 }
169
170 #[inline(always)]
171 pub(super) fn alpha_composite_arbitrary<S: Simd, T: Iterator<Item = f32x16<S>>>(
172 simd: S,
173 dest: &mut [f32],
174 src: T,
175 ) {
176 simd.vectorize(
177 #[inline(always)]
178 || {
179 for (next_dest, next_src) in dest.chunks_exact_mut(16).zip(src) {
180 let one_minus_alpha = 1.0 - next_src.splat_4th();
181 alpha_composite_inner(simd, next_dest, next_src, one_minus_alpha);
182 }
183 },
184 );
185 }
186
187 pub(super) fn blend<S: Simd, T: Iterator<Item = f32x16<S>>>(
188 simd: S,
189 dest: &mut [f32],
190 src: T,
191 blend_mode: BlendMode,
192 ) {
193 let mask = f32x16::splat(simd, 1.0);
194
195 for (next_dest, next_src) in dest.chunks_exact_mut(16).zip(src) {
196 let bg_v = f32x16::from_slice(simd, next_dest);
197 let src_c = blend::mix(next_src, bg_v, blend_mode);
198 let res = blend_mode.compose(simd, src_c, bg_v, mask);
199 next_dest.copy_from_slice(&res.val);
200 }
201 }
202
203 #[inline(always)]
204 fn alpha_composite_inner<S: Simd>(
205 s: S,
206 dest: &mut [f32],
207 src: f32x16<S>,
208 one_minus_alpha: f32x16<S>,
209 ) {
210 let mut bg_c = f32x16::from_slice(s, dest);
211 bg_c = one_minus_alpha.madd(bg_c, src);
212 dest.copy_from_slice(&bg_c.val);
213 }
214}
215
216mod alpha_fill {
217 use crate::fine::Splat4thExt;
218 use crate::fine::highp::compose::ComposeExt;
219 use crate::fine::highp::{blend, extract_masks};
220 use crate::peniko::BlendMode;
221 use vello_common::fearless_simd::*;
222
223 #[inline(always)]
224 pub(super) fn alpha_composite_solid<S: Simd>(
225 s: S,
226 dest: &mut [f32],
227 src: [f32; 4],
228 alphas: &[u8],
229 ) {
230 s.vectorize(
231 #[inline(always)]
232 || {
233 let src_a = f32x16::splat(s, src[3]);
234 let src_c = f32x16::block_splat(src.simd_into(s));
235 let one = f32x16::splat(s, 1.0);
236
237 for (next_dest, next_mask) in dest.chunks_exact_mut(16).zip(alphas.chunks_exact(4))
238 {
239 alpha_composite_inner(s, next_dest, next_mask, src_c, src_a, one);
240 }
241 },
242 );
243 }
244
245 #[inline(always)]
246 pub(super) fn alpha_composite_arbitrary<S: Simd, T: Iterator<Item = f32x16<S>>>(
247 simd: S,
248 dest: &mut [f32],
249 src: T,
250 alphas: &[u8],
251 ) {
252 simd.vectorize(
253 #[inline(always)]
254 || {
255 let one = f32x16::splat(simd, 1.0);
256
257 for ((next_dest, next_mask), next_src) in dest
258 .chunks_exact_mut(16)
259 .zip(alphas.chunks_exact(4))
260 .zip(src)
261 {
262 let src_a = next_src.splat_4th();
263 alpha_composite_inner(simd, next_dest, next_mask, next_src, src_a, one);
264 }
265 },
266 );
267 }
268
269 pub(super) fn blend<S: Simd, T: Iterator<Item = f32x16<S>>>(
270 simd: S,
271 dest: &mut [f32],
272 src: T,
273 alphas: &[u8],
274 blend_mode: BlendMode,
275 ) {
276 simd.vectorize(
277 #[inline(always)]
278 || {
279 for ((next_dest, next_mask), next_src) in dest
280 .chunks_exact_mut(16)
281 .zip(alphas.chunks_exact(4))
282 .zip(src)
283 {
284 let masks = extract_masks(simd, next_mask);
285 let bg = f32x16::from_slice(simd, next_dest);
286 let src_c = blend::mix(next_src, bg, blend_mode);
287 let res = blend_mode.compose(simd, src_c, bg, masks);
288 next_dest.copy_from_slice(&res.val);
289 }
290 },
291 );
292 }
293
294 #[inline(always)]
295 fn alpha_composite_inner<S: Simd>(
296 s: S,
297 dest: &mut [f32],
298 masks: &[u8],
299 src_c: f32x16<S>,
300 src_a: f32x16<S>,
301 one: f32x16<S>,
302 ) {
303 let bg_c = f32x16::from_slice(s, dest);
304 let mask_a = extract_masks(s, masks);
305 let inv_src_a_mask_a = src_a.madd(-mask_a, one);
307
308 let res = bg_c.madd(inv_src_a_mask_a, src_c * mask_a);
309 dest.copy_from_slice(&res.val);
310 }
311}
312
313#[inline(always)]
314fn extract_masks<S: Simd>(simd: S, masks: &[u8]) -> f32x16<S> {
315 let mut base_mask = [
316 masks[0] as f32,
317 masks[1] as f32,
318 masks[2] as f32,
319 masks[3] as f32,
320 ]
321 .simd_into(simd);
322
323 base_mask *= f32x4::splat(simd, 1.0 / 255.0);
324
325 let res = f32x16::block_splat(base_mask);
326 let zip_low = res.zip_low(res);
327
328 zip_low.zip_low(zip_low)
329}