1use crate::filter::filter_highp;
17use crate::fine::FineKernel;
18use crate::fine::{COLOR_COMPONENTS, Painter};
19use crate::layer_manager::LayerManager;
20use crate::peniko::BlendMode;
21use crate::region::Region;
22use vello_common::fearless_simd::*;
23use vello_common::filter_effects::Filter;
24use vello_common::kurbo::Affine;
25use vello_common::mask::Mask;
26use vello_common::paint::PremulColor;
27use vello_common::pixmap::Pixmap;
28use vello_common::tile::Tile;
29
30pub(crate) mod blend;
31pub(crate) mod compose;
32
33#[derive(Clone, Copy, Debug)]
35pub struct F32Kernel;
36
37impl<S: Simd> FineKernel<S> for F32Kernel {
38 type Numeric = f32;
39 type Composite = f32x16<S>;
40 type NumericVec = f32x16<S>;
41
42 #[inline(always)]
44 fn extract_color(color: PremulColor) -> [Self::Numeric; 4] {
45 color.as_premul_f32().components
46 }
47
48 #[inline(always)]
53 fn pack(simd: S, region: &mut Region<'_>, blend_buf: &[Self::Numeric]) {
54 simd.vectorize(
55 #[inline(always)]
56 || {
57 for y in 0..Tile::HEIGHT {
58 for (x, pixel) in region
59 .row_mut(y)
60 .chunks_exact_mut(COLOR_COMPONENTS)
61 .enumerate()
62 {
63 let idx =
64 COLOR_COMPONENTS * (usize::from(Tile::HEIGHT) * x + usize::from(y));
65 let start = &blend_buf[idx..];
66 let converted = [
69 (start[0] * 255.0 + 0.5) as u8,
70 (start[1] * 255.0 + 0.5) as u8,
71 (start[2] * 255.0 + 0.5) as u8,
72 (start[3] * 255.0 + 0.5) as u8,
73 ];
74 pixel.copy_from_slice(&converted);
75 }
76 }
77 },
78 );
79 }
80
81 #[inline(always)]
87 fn unpack(simd: S, region: &mut Region<'_>, blend_buf: &mut [Self::Numeric]) {
88 simd.vectorize(
89 #[inline(always)]
90 || {
91 for y in 0..Tile::HEIGHT {
92 for (x, pixel) in region.row_mut(y).chunks_exact(COLOR_COMPONENTS).enumerate() {
93 let idx =
94 COLOR_COMPONENTS * (usize::from(Tile::HEIGHT) * x + usize::from(y));
95 let start = &mut blend_buf[idx..];
96 start[0] = pixel[0] as f32 / 255.0;
98 start[1] = pixel[1] as f32 / 255.0;
99 start[2] = pixel[2] as f32 / 255.0;
100 start[3] = pixel[3] as f32 / 255.0;
101 }
102 }
103 },
104 );
105 }
106
107 fn filter_layer(
111 pixmap: &mut Pixmap,
112 filter: &Filter,
113 layer_manager: &mut LayerManager,
114 transform: Affine,
115 ) {
116 filter_highp(filter, pixmap, layer_manager, transform);
117 }
118
119 #[inline(never)]
123 fn copy_solid(simd: S, dest: &mut [Self::Numeric], src: [Self::Numeric; 4]) {
124 simd.vectorize(
125 #[inline(always)]
126 || {
127 let color = f32x16::block_splat(src.simd_into(simd));
128
129 for el in dest.chunks_exact_mut(16) {
130 el.copy_from_slice(color.as_slice());
131 }
132 },
133 );
134 }
135
136 fn apply_mask(
141 simd: S,
142 dest: &mut [Self::Numeric],
143 mut src: impl Iterator<Item = Self::NumericVec>,
144 ) {
145 simd.vectorize(
146 #[inline(always)]
147 || {
148 for el in dest.chunks_exact_mut(16) {
149 let loaded = f32x16::from_slice(simd, el);
150 let mulled = loaded * src.next().unwrap();
151 el.copy_from_slice(mulled.as_slice());
152 }
153 },
154 );
155 }
156
157 #[inline(always)]
161 fn apply_painter<'a>(_: S, dest: &mut [Self::Numeric], mut painter: impl Painter + 'a) {
162 painter.paint_f32(dest);
163 }
164
165 #[inline(always)]
170 fn alpha_composite_solid(
171 simd: S,
172 dest: &mut [Self::Numeric],
173 src: [Self::Numeric; 4],
174 alphas: Option<&[u8]>,
175 ) {
176 if let Some(alphas) = alphas {
177 alpha_fill::alpha_composite_solid(
178 simd,
179 dest,
180 src,
181 bytemuck::cast_slice::<u8, [u8; 4]>(alphas).iter().copied(),
182 );
183 } else {
184 fill::alpha_composite_solid(simd, dest, src);
185 }
186 }
187
188 fn alpha_composite_buffer(
194 simd: S,
195 dest: &mut [Self::Numeric],
196 src: &[Self::Numeric],
197 alphas: Option<&[u8]>,
198 ) {
199 if let Some(alphas) = alphas {
200 alpha_fill::alpha_composite_arbitrary(
201 simd,
202 dest,
203 src.chunks_exact(16).map(|el| f32x16::from_slice(simd, el)),
204 bytemuck::cast_slice::<u8, [u8; 4]>(alphas).iter().copied(),
205 );
206 } else {
207 fill::alpha_composite_arbitrary(
208 simd,
209 dest,
210 src.chunks_exact(16).map(|el| f32x16::from_slice(simd, el)),
211 );
212 }
213 }
214
215 fn blend(
220 simd: S,
221 dest: &mut [Self::Numeric],
222 mut start_x: u16,
223 start_y: u16,
224 src: impl Iterator<Item = Self::Composite>,
225 blend_mode: BlendMode,
226 alphas: Option<&[u8]>,
227 mask: Option<&Mask>,
228 ) {
229 let alpha_iter = alphas.map(|a| bytemuck::cast_slice::<u8, [u8; 4]>(a).iter().copied());
230
231 let mask_iter = mask.map(|m| {
232 let width = m.width();
233 let height = m.height();
234
235 core::iter::from_fn(move || {
236 let samples = if start_x < width && start_y + 3 < height {
237 [
239 m.sample(start_x, start_y),
240 m.sample(start_x, start_y + 1),
241 m.sample(start_x, start_y + 2),
242 m.sample(start_x, start_y + 3),
243 ]
244 } else {
245 [
247 if start_x < width && start_y < height {
248 m.sample(start_x, start_y)
249 } else {
250 255
251 },
252 if start_x < width && start_y + 1 < height {
253 m.sample(start_x, start_y + 1)
254 } else {
255 255
256 },
257 if start_x < width && start_y + 2 < height {
258 m.sample(start_x, start_y + 2)
259 } else {
260 255
261 },
262 if start_x < width && start_y + 3 < height {
263 m.sample(start_x, start_y + 3)
264 } else {
265 255
266 },
267 ]
268 };
269
270 start_x += 1;
271
272 Some(samples)
273 })
274 });
275
276 match (alpha_iter, mask_iter) {
277 (Some(alpha_iter), Some(mut mask_iter)) => {
278 let iter = alpha_iter.map(|a1| {
279 let a2 = mask_iter.next().unwrap();
280 [
281 ((a1[0] as u16 * a2[0] as u16) / 255) as u8,
282 ((a1[1] as u16 * a2[1] as u16) / 255) as u8,
283 ((a1[2] as u16 * a2[2] as u16) / 255) as u8,
284 ((a1[3] as u16 * a2[3] as u16) / 255) as u8,
285 ]
286 });
287 alpha_fill::blend(simd, dest, src, iter, blend_mode);
288 }
289 (None, Some(mask_iter)) => alpha_fill::blend(simd, dest, src, mask_iter, blend_mode),
290 (Some(alpha_iter), None) => alpha_fill::blend(simd, dest, src, alpha_iter, blend_mode),
291 (None, None) => {
292 fill::blend(simd, dest, src, blend_mode);
293 }
294 }
295 }
296}
297
298mod fill {
299 use crate::fine::Splat4thExt;
305 use crate::fine::highp::blend;
306 use crate::fine::highp::compose::ComposeExt;
307 use crate::peniko::BlendMode;
308
309 use vello_common::fearless_simd::*;
310
311 #[inline(always)]
319 pub(super) fn alpha_composite_solid<S: Simd>(s: S, dest: &mut [f32], src: [f32; 4]) {
320 s.vectorize(
321 #[inline(always)]
322 || {
323 let one_minus_alpha = 1.0 - f32x16::block_splat(f32x4::splat(s, src[3]));
324 let src_c = f32x16::block_splat(f32x4::simd_from(src, s));
325
326 for next_dest in dest.chunks_exact_mut(16) {
327 alpha_composite_inner(s, next_dest, src_c, one_minus_alpha);
328 }
329 },
330 );
331 }
332
333 #[inline(always)]
337 pub(super) fn alpha_composite_arbitrary<S: Simd, T: Iterator<Item = f32x16<S>>>(
338 simd: S,
339 dest: &mut [f32],
340 src: T,
341 ) {
342 simd.vectorize(
343 #[inline(always)]
344 || {
345 for (next_dest, next_src) in dest.chunks_exact_mut(16).zip(src) {
346 let one_minus_alpha = 1.0 - next_src.splat_4th();
347 alpha_composite_inner(simd, next_dest, next_src, one_minus_alpha);
348 }
349 },
350 );
351 }
352
353 pub(super) fn blend<S: Simd, T: Iterator<Item = f32x16<S>>>(
355 simd: S,
356 dest: &mut [f32],
357 src: T,
358 blend_mode: BlendMode,
359 ) {
360 for (next_dest, next_src) in dest.chunks_exact_mut(16).zip(src) {
361 let bg_v = f32x16::from_slice(simd, next_dest);
362 let src_c = blend::mix(next_src, bg_v, blend_mode);
363 let res = blend_mode.compose(simd, src_c, bg_v, None);
364 next_dest.copy_from_slice(res.as_slice());
365 }
366 }
367
368 #[inline(always)]
373 fn alpha_composite_inner<S: Simd>(
374 s: S,
375 dest: &mut [f32],
376 src: f32x16<S>,
377 one_minus_alpha: f32x16<S>,
378 ) {
379 let mut bg_c = f32x16::from_slice(s, dest);
380 bg_c = one_minus_alpha.madd(bg_c, src);
381 dest.copy_from_slice(bg_c.as_slice());
382 }
383}
384
385mod alpha_fill {
386 use crate::fine::Splat4thExt;
392 use crate::fine::highp::compose::ComposeExt;
393 use crate::fine::highp::{blend, extract_masks};
394 use crate::peniko::BlendMode;
395 use vello_common::fearless_simd::*;
396
397 #[inline(always)]
401 pub(super) fn alpha_composite_solid<S: Simd>(
402 s: S,
403 dest: &mut [f32],
404 src: [f32; 4],
405 alphas: impl Iterator<Item = [u8; 4]>,
406 ) {
407 s.vectorize(
408 #[inline(always)]
409 || {
410 let src_a = f32x16::splat(s, src[3]);
411 let src_c = f32x16::block_splat(src.simd_into(s));
412 let one = f32x16::splat(s, 1.0);
413
414 for (next_dest, next_mask) in dest.chunks_exact_mut(16).zip(alphas) {
415 alpha_composite_inner(s, next_dest, &next_mask, src_c, src_a, one);
416 }
417 },
418 );
419 }
420
421 pub(super) fn alpha_composite_arbitrary<S: Simd, T: Iterator<Item = f32x16<S>>>(
425 simd: S,
426 dest: &mut [f32],
427 src: T,
428 alphas: impl Iterator<Item = [u8; 4]>,
429 ) {
430 simd.vectorize(
431 #[inline(always)]
432 || {
433 let one = f32x16::splat(simd, 1.0);
434
435 for ((next_dest, next_mask), next_src) in
436 dest.chunks_exact_mut(16).zip(alphas).zip(src)
437 {
438 let src_a = next_src.splat_4th();
439 alpha_composite_inner(simd, next_dest, &next_mask, next_src, src_a, one);
440 }
441 },
442 );
443 }
444
445 pub(super) fn blend<S: Simd, T: Iterator<Item = f32x16<S>>>(
447 simd: S,
448 dest: &mut [f32],
449 src: T,
450 alphas: impl Iterator<Item = [u8; 4]>,
451 blend_mode: BlendMode,
452 ) {
453 simd.vectorize(
454 #[inline(always)]
455 || {
456 for ((next_dest, next_mask), next_src) in
457 dest.chunks_exact_mut(16).zip(alphas).zip(src)
458 {
459 let masks = extract_masks(simd, &next_mask);
460 let bg = f32x16::from_slice(simd, next_dest);
461 let src_c = blend::mix(next_src, bg, blend_mode);
462 let res = blend_mode.compose(simd, src_c, bg, Some(masks));
463 next_dest.copy_from_slice(res.as_slice());
464 }
465 },
466 );
467 }
468
469 #[inline(always)]
475 fn alpha_composite_inner<S: Simd>(
476 s: S,
477 dest: &mut [f32],
478 masks: &[u8; 4],
479 src_c: f32x16<S>,
480 src_a: f32x16<S>,
481 one: f32x16<S>,
482 ) {
483 let bg_c = f32x16::from_slice(s, dest);
484 let mask_a = extract_masks(s, masks);
485 let inv_src_a_mask_a = src_a.madd(-mask_a, one);
487
488 let res = bg_c.madd(inv_src_a_mask_a, src_c * mask_a);
489 dest.copy_from_slice(res.as_slice());
490 }
491}
492
493#[inline(always)]
501fn extract_masks<S: Simd>(simd: S, masks: &[u8; 4]) -> f32x16<S> {
502 let mut base_mask = [
503 masks[0] as f32,
504 masks[1] as f32,
505 masks[2] as f32,
506 masks[3] as f32,
507 ]
508 .simd_into(simd);
509
510 base_mask *= f32x4::splat(simd, 1.0 / 255.0);
511
512 let res = f32x16::block_splat(base_mask);
513 let zip_low = res.zip_low(res);
514
515 zip_low.zip_low(zip_low)
516}