1mod compose;
5mod gradient;
6mod image;
7
8use crate::fine::lowp::image::BilinearImagePainter;
9use crate::fine::{COLOR_COMPONENTS, Painter, SCRATCH_BUF_SIZE};
10use crate::fine::{FineKernel, highp, u8_to_f32};
11use crate::peniko::BlendMode;
12use crate::region::Region;
13use crate::util::Div255Ext;
14use bytemuck::cast_slice;
15use vello_common::coarse::WideTile;
16use vello_common::encode::{EncodedGradient, EncodedImage};
17use vello_common::fearless_simd::*;
18use vello_common::paint::PremulColor;
19use vello_common::pixmap::Pixmap;
20use vello_common::tile::Tile;
21use vello_common::util::f32_to_u8;
22
23#[derive(Clone, Copy, Debug)]
25pub struct U8Kernel;
26
27impl<S: Simd> FineKernel<S> for U8Kernel {
28 type Numeric = u8;
29 type Composite = u8x32<S>;
30 type NumericVec = u8x16<S>;
31
32 #[inline]
33 fn extract_color(color: PremulColor) -> [Self::Numeric; 4] {
34 color.as_premul_rgba8().to_u8_array()
35 }
36
37 #[inline(always)]
38 fn pack(simd: S, region: &mut Region<'_>, blend_buf: &[Self::Numeric]) {
39 if region.width != WideTile::WIDTH || region.height != Tile::HEIGHT {
40 pack(region, blend_buf);
43 } else {
44 simd.vectorize(
45 #[inline(always)]
46 || {
47 pack_block(simd, region, blend_buf);
48 },
49 );
50 }
51 }
52
53 fn copy_solid(simd: S, dest: &mut [Self::Numeric], src: [Self::Numeric; 4]) {
54 simd.vectorize(
55 #[inline(always)]
56 || {
57 let color = u8x64::block_splat(
58 u32x4::splat(simd, u32::from_ne_bytes(src)).reinterpret_u8(),
59 );
60
61 for el in dest.chunks_exact_mut(64) {
62 el.copy_from_slice(&color.val);
63 }
64 },
65 );
66 }
67
68 fn gradient_painter<'a>(
69 simd: S,
70 gradient: &'a EncodedGradient,
71 t_vals: &'a [f32],
72 ) -> impl Painter + 'a {
73 simd.vectorize(
74 #[inline(always)]
75 || gradient::GradientPainter::new(simd, gradient, t_vals),
76 )
77 }
78
79 fn medium_quality_image_painter<'a>(
80 simd: S,
81 image: &'a EncodedImage,
82 pixmap: &'a Pixmap,
83 start_x: u16,
84 start_y: u16,
85 ) -> impl Painter + 'a {
86 simd.vectorize(
87 #[inline(always)]
88 || BilinearImagePainter::new(simd, image, pixmap, start_x, start_y),
89 )
90 }
91
92 fn apply_mask(
93 simd: S,
94 dest: &mut [Self::Numeric],
95 mut src: impl Iterator<Item = Self::NumericVec>,
96 ) {
97 simd.vectorize(
98 #[inline(always)]
99 || {
100 for el in dest.chunks_exact_mut(16) {
101 let loaded = u8x16::from_slice(simd, el);
102 let mulled = simd.narrow_u16x16(
103 (simd.widen_u8x16(loaded) * simd.widen_u8x16(src.next().unwrap()))
104 .div_255(),
105 );
106 el.copy_from_slice(&mulled.val);
107 }
108 },
109 );
110 }
111
112 #[inline(always)]
113 fn apply_painter<'a>(_: S, dest: &mut [Self::Numeric], mut painter: impl Painter + 'a) {
114 painter.paint_u8(dest);
115 }
116
117 #[inline(always)]
118 fn alpha_composite_solid(
119 simd: S,
120 dest: &mut [Self::Numeric],
121 src: [Self::Numeric; 4],
122 alphas: Option<&[u8]>,
123 ) {
124 if let Some(alphas) = alphas {
125 alpha_fill::alpha_composite_solid(simd, dest, src, alphas);
126 } else {
127 fill::alpha_composite_solid(simd, dest, src);
128 }
129 }
130
131 fn alpha_composite_buffer(
132 simd: S,
133 dest: &mut [Self::Numeric],
134 src: &[Self::Numeric],
135 alphas: Option<&[u8]>,
136 ) {
137 let src_iter = src.chunks_exact(32).map(|el| u8x32::from_slice(simd, el));
138
139 if let Some(alphas) = alphas {
140 alpha_fill::alpha_composite(simd, dest, src_iter, alphas);
141 } else {
142 fill::alpha_composite(simd, dest, src_iter);
143 }
144 }
145
146 fn blend(
147 simd: S,
148 dest: &mut [Self::Numeric],
149 src: impl Iterator<Item = Self::Composite>,
150 blend_mode: BlendMode,
151 alphas: Option<&[u8]>,
152 ) {
153 if let Some(alphas) = alphas {
154 alpha_fill::blend(simd, dest, src, blend_mode, alphas);
155 } else {
156 fill::blend(simd, dest, src, blend_mode);
157 }
158 }
159}
160
161mod fill {
162 use crate::fine::Splat4thExt;
163 use crate::fine::lowp::compose::ComposeExt;
164 use crate::fine::lowp::mix;
165 use crate::peniko::{BlendMode, Mix};
166 use crate::util::normalized_mul;
167 use vello_common::fearless_simd::*;
168
169 pub(super) fn blend<S: Simd, T: Iterator<Item = u8x32<S>>>(
170 simd: S,
171 dest: &mut [u8],
172 src: T,
173 blend_mode: BlendMode,
174 ) {
175 simd.vectorize(
176 #[inline(always)]
177 || {
178 #[expect(deprecated, reason = "Provided by the user, need to handle correctly.")]
179 let default_mix = matches!(blend_mode.mix, Mix::Normal | Mix::Clip);
180 let mask = u8x32::splat(simd, 255);
181 for (next_dest, next_src) in dest.chunks_exact_mut(32).zip(src) {
182 let bg_v = u8x32::from_slice(simd, next_dest);
183 let src_v = if default_mix {
184 next_src
185 } else {
186 mix(next_src, bg_v, blend_mode)
187 };
188 let res = blend_mode.compose(simd, src_v, bg_v, mask);
189 next_dest.copy_from_slice(&res.val);
190 }
191 },
192 );
193 }
194
195 pub(super) fn alpha_composite_solid<S: Simd>(s: S, dest: &mut [u8], src: [u8; 4]) {
196 s.vectorize(
197 #[inline(always)]
198 || {
199 let one_minus_alpha = 255 - u8x32::splat(s, src[3]);
200 let src_c = u32x8::splat(s, u32::from_ne_bytes(src)).reinterpret_u8();
201
202 for next_dest in dest.chunks_exact_mut(64) {
203 let bg_v = u8x64::from_slice(s, next_dest);
206 let (bg_1, bg_2) = s.split_u8x64(bg_v);
207 let res_1 = alpha_composite_inner(s, bg_1, src_c, one_minus_alpha);
208 let res_2 = alpha_composite_inner(s, bg_2, src_c, one_minus_alpha);
209 let combined = s.combine_u8x32(res_1, res_2);
210 next_dest.copy_from_slice(&combined.val);
211 }
212 },
213 );
214 }
215
216 pub(super) fn alpha_composite<S: Simd, T: Iterator<Item = u8x32<S>>>(
217 simd: S,
218 dest: &mut [u8],
219 src: T,
220 ) {
221 simd.vectorize(
222 #[inline(always)]
223 || {
224 for (next_dest, next_src) in dest.chunks_exact_mut(32).zip(src) {
225 let one_minus_alpha = 255 - next_src.splat_4th();
226 let bg_v = u8x32::from_slice(simd, next_dest);
227 let res = alpha_composite_inner(simd, bg_v, next_src, one_minus_alpha);
228 next_dest.copy_from_slice(&res.val);
229 }
230 },
231 );
232 }
233
234 #[inline(always)]
235 fn alpha_composite_inner<S: Simd>(
236 s: S,
237 bg: u8x32<S>,
238 src: u8x32<S>,
239 one_minus_alpha: u8x32<S>,
240 ) -> u8x32<S> {
241 s.narrow_u16x32(normalized_mul(bg, one_minus_alpha)) + src
242 }
243}
244
245mod alpha_fill {
246 use crate::fine::Splat4thExt;
247 use crate::fine::lowp::compose::ComposeExt;
248 use crate::fine::lowp::{extract_masks, mix};
249 use crate::peniko::{BlendMode, Mix};
250 use crate::util::{Div255Ext, normalized_mul};
251 use vello_common::fearless_simd::*;
252
253 pub(super) fn blend<S: Simd, T: Iterator<Item = u8x32<S>>>(
254 simd: S,
255 dest: &mut [u8],
256 src: T,
257 blend_mode: BlendMode,
258 alphas: &[u8],
259 ) {
260 simd.vectorize(
261 #[inline(always)]
262 || {
263 #[expect(deprecated, reason = "Provided by the user, need to handle correctly.")]
264 let default_mix = matches!(blend_mode.mix, Mix::Normal | Mix::Clip);
265
266 for ((next_bg, next_mask), next_src) in dest
267 .chunks_exact_mut(32)
268 .zip(alphas.chunks_exact(8))
269 .zip(src)
270 {
271 let bg_v = u8x32::from_slice(simd, next_bg);
272 let src_c = if default_mix {
273 next_src
274 } else {
275 mix(next_src, bg_v, blend_mode)
276 };
277 let masks = extract_masks(simd, next_mask);
278 let res = blend_mode.compose(simd, src_c, bg_v, masks);
279
280 next_bg.copy_from_slice(&res.val);
281 }
282 },
283 );
284 }
285
286 #[inline(always)]
287 pub(super) fn alpha_composite_solid<S: Simd>(
288 s: S,
289 dest: &mut [u8],
290 src: [u8; 4],
291 alphas: &[u8],
292 ) {
293 s.vectorize(
294 #[inline(always)]
295 || {
296 let src_a = u8x32::splat(s, src[3]);
297 let src_c = u32x8::splat(s, u32::from_ne_bytes(src)).reinterpret_u8();
298 let one = u8x32::splat(s, 255);
299
300 for (next_bg, next_mask) in dest.chunks_exact_mut(32).zip(alphas.chunks_exact(8)) {
301 alpha_composite_inner(s, next_bg, next_mask, src_c, src_a, one);
302 }
303 },
304 );
305 }
306
307 #[inline(always)]
308 pub(super) fn alpha_composite<S: Simd, T: Iterator<Item = u8x32<S>>>(
309 simd: S,
310 dest: &mut [u8],
311 src: T,
312 alphas: &[u8],
313 ) {
314 simd.vectorize(
315 #[inline(always)]
316 || {
317 let one = u8x32::splat(simd, 255);
318
319 for ((next_dest, next_mask), next_src) in dest
320 .chunks_exact_mut(32)
321 .zip(alphas.chunks_exact(8))
322 .zip(src)
323 {
324 let src_a = next_src.splat_4th();
325 alpha_composite_inner(simd, next_dest, next_mask, next_src, src_a, one);
326 }
327 },
328 );
329 }
330
331 #[inline(always)]
332 fn alpha_composite_inner<S: Simd>(
333 s: S,
334 dest: &mut [u8],
335 masks: &[u8],
336 src_c: u8x32<S>,
337 src_a: u8x32<S>,
338 one: u8x32<S>,
339 ) {
340 s.vectorize(
341 #[inline(always)]
342 || {
343 let bg_v = u8x32::from_slice(s, dest);
344
345 let mask_v = extract_masks(s, masks);
346 let inv_src_a_mask_a = one - s.narrow_u16x32(normalized_mul(src_a, mask_v));
347
348 let p1 = s.widen_u8x32(bg_v) * s.widen_u8x32(inv_src_a_mask_a);
349 let p2 = s.widen_u8x32(src_c) * s.widen_u8x32(mask_v);
350 let res = s.narrow_u16x32((p1 + p2).div_255());
351
352 dest.copy_from_slice(&res.val);
353 },
354 );
355 }
356}
357
358fn mix<S: Simd>(src_c: u8x32<S>, bg_c: u8x32<S>, blend_mode: BlendMode) -> u8x32<S> {
360 let to_f32 = |val: u8x32<S>| {
361 let (a, b) = src_c.simd.split_u8x32(val);
362 let mut a = u8_to_f32(a);
363 let mut b = u8_to_f32(b);
364 a *= f32x16::splat(src_c.simd, 1.0 / 255.0);
365 b *= f32x16::splat(src_c.simd, 1.0 / 255.0);
366 (a, b)
367 };
368
369 let to_u8 = |val1: f32x16<S>, val2: f32x16<S>| {
370 let val1 =
371 f32_to_u8(f32x16::splat(val1.simd, 255.0).madd(val1, f32x16::splat(val1.simd, 0.5)));
372 let val2 =
373 f32_to_u8(f32x16::splat(val2.simd, 255.0).madd(val2, f32x16::splat(val2.simd, 0.5)));
374
375 val1.simd.combine_u8x16(val1, val2)
376 };
377
378 let (mut src_1, mut src_2) = to_f32(src_c);
379 let (bg_1, bg_2) = to_f32(bg_c);
380
381 src_1 = highp::blend::mix(src_1, bg_1, blend_mode);
382 src_2 = highp::blend::mix(src_2, bg_2, blend_mode);
383
384 to_u8(src_1, src_2)
385}
386
387#[inline(always)]
388fn extract_masks<S: Simd>(simd: S, masks: &[u8]) -> u8x32<S> {
389 let m1 =
390 u32x4::splat(simd, u32::from_ne_bytes(masks[0..4].try_into().unwrap())).reinterpret_u8();
391 let m2 =
392 u32x4::splat(simd, u32::from_ne_bytes(masks[4..8].try_into().unwrap())).reinterpret_u8();
393
394 let zipped1 = m1.zip_low(m1);
395 let zipped1 = zipped1.zip_low(zipped1);
396
397 let zipped2 = m2.zip_low(m2);
398 let zipped2 = zipped2.zip_low(zipped2);
399
400 simd.combine_u8x16(zipped1, zipped2)
401}
402
403#[inline(always)]
404fn pack(region: &mut Region<'_>, blend_buf: &[u8]) {
405 for y in 0..Tile::HEIGHT {
406 for (x, pixel) in region
407 .row_mut(y)
408 .chunks_exact_mut(COLOR_COMPONENTS)
409 .enumerate()
410 {
411 let idx = COLOR_COMPONENTS * (usize::from(Tile::HEIGHT) * x + usize::from(y));
412 pixel.copy_from_slice(&blend_buf[idx..][..COLOR_COMPONENTS]);
413 }
414 }
415}
416
417#[inline(always)]
421fn pack_block<S: Simd>(simd: S, region: &mut Region<'_>, mut buf: &[u8]) {
422 buf = &buf[..SCRATCH_BUF_SIZE];
423
424 const CHUNK_LENGTH: usize = 64;
425 const SLICE_WIDTH: usize = WideTile::WIDTH as usize * COLOR_COMPONENTS;
426
427 let region_areas = region.areas();
428 let [s1, s2, s3, s4] = region_areas;
429 let dest_slices: &mut [&mut [u8; SLICE_WIDTH]; 4] = &mut [
430 (*s1).try_into().unwrap(),
431 (*s2).try_into().unwrap(),
432 (*s3).try_into().unwrap(),
433 (*s4).try_into().unwrap(),
434 ];
435
436 for (idx, col) in buf.chunks_exact(CHUNK_LENGTH).enumerate() {
437 let dest_idx = idx * CHUNK_LENGTH / 4;
438
439 let casted: &[u32; 16] = cast_slice::<u8, u32>(col).try_into().unwrap();
440
441 let loaded = simd.load_interleaved_128_u32x16(casted).reinterpret_u8();
442 dest_slices[0][dest_idx..][..16].copy_from_slice(&loaded.val[..16]);
443 dest_slices[1][dest_idx..][..16].copy_from_slice(&loaded.val[16..32]);
444 dest_slices[2][dest_idx..][..16].copy_from_slice(&loaded.val[32..48]);
445 dest_slices[3][dest_idx..][..16].copy_from_slice(&loaded.val[48..64]);
446 }
447}