vello_cpu/fine/
mod.rs

1// Copyright 2025 the Vello Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4mod common;
5mod highp;
6mod lowp;
7
8use crate::peniko::{BlendMode, Compose, ImageQuality, Mix};
9use crate::region::Region;
10use alloc::vec;
11use alloc::vec::Vec;
12use core::fmt::Debug;
13use core::iter;
14use vello_common::coarse::{Cmd, WideTile};
15use vello_common::encode::{
16    EncodedBlurredRoundedRectangle, EncodedGradient, EncodedImage, EncodedKind, EncodedPaint,
17};
18use vello_common::paint::{ImageSource, Paint, PremulColor};
19use vello_common::tile::Tile;
20
21pub(crate) const COLOR_COMPONENTS: usize = 4;
22pub(crate) const TILE_HEIGHT_COMPONENTS: usize = Tile::HEIGHT as usize * COLOR_COMPONENTS;
23pub const SCRATCH_BUF_SIZE: usize =
24    WideTile::WIDTH as usize * Tile::HEIGHT as usize * COLOR_COMPONENTS;
25
26use crate::fine::common::gradient::linear::SimdLinearKind;
27use crate::fine::common::gradient::radial::SimdRadialKind;
28use crate::fine::common::gradient::sweep::SimdSweepKind;
29use crate::fine::common::gradient::{GradientPainter, calculate_t_vals};
30use crate::fine::common::image::{FilteredImagePainter, NNImagePainter, PlainNNImagePainter};
31use crate::fine::common::rounded_blurred_rect::BlurredRoundedRectFiller;
32use crate::util::{BlendModeExt, EncodedImageExt};
33pub use highp::F32Kernel;
34pub use lowp::U8Kernel;
35use vello_common::fearless_simd::{
36    Simd, SimdBase, SimdFloat, SimdInto, f32x4, f32x8, f32x16, u8x16, u8x32, u32x4, u32x8,
37};
38use vello_common::pixmap::Pixmap;
39use vello_common::simd::Splat4thExt;
40use vello_common::util::f32_to_u8;
41
42pub type ScratchBuf<F> = [F; SCRATCH_BUF_SIZE];
43
44pub trait Numeric: Copy + Default + Clone + Debug + PartialEq + Send + Sync + 'static {
45    const ZERO: Self;
46    const ONE: Self;
47}
48
49impl Numeric for f32 {
50    const ZERO: Self = 0.0;
51    const ONE: Self = 1.0;
52}
53
54impl Numeric for u8 {
55    const ZERO: Self = 0;
56    const ONE: Self = 255;
57}
58
59pub trait NumericVec<S: Simd>: Copy + Clone + Send + Sync {
60    fn from_f32(simd: S, val: f32x16<S>) -> Self;
61    fn from_u8(simd: S, val: u8x16<S>) -> Self;
62}
63
64impl<S: Simd> NumericVec<S> for f32x16<S> {
65    #[inline(always)]
66    fn from_f32(_: S, val: Self) -> Self {
67        val
68    }
69
70    #[inline(always)]
71    fn from_u8(simd: S, val: u8x16<S>) -> Self {
72        let converted = u8_to_f32(val);
73        converted * Self::splat(simd, 1.0 / 255.0)
74    }
75}
76
77impl<S: Simd> NumericVec<S> for u8x16<S> {
78    #[inline(always)]
79    fn from_f32(simd: S, val: f32x16<S>) -> Self {
80        let v1 = f32x16::splat(simd, 255.0);
81        let v2 = f32x16::splat(simd, 0.5);
82        let mulled = val.madd(v1, v2);
83
84        f32_to_u8(mulled)
85    }
86
87    #[inline(always)]
88    fn from_u8(_: S, val: Self) -> Self {
89        val
90    }
91}
92
93#[inline(always)]
94pub(crate) fn u8_to_f32<S: Simd>(val: u8x16<S>) -> f32x16<S> {
95    let simd = val.simd;
96    let zeroes = u8x16::splat(simd, 0);
97
98    let zip1 = simd.zip_high_u8x16(val, zeroes);
99    let zip2 = simd.zip_low_u8x16(val, zeroes);
100
101    let p1 = simd.zip_low_u8x16(zip2, zeroes).reinterpret_u32().cvt_f32();
102    let p2 = simd
103        .zip_high_u8x16(zip2, zeroes)
104        .reinterpret_u32()
105        .cvt_f32();
106    let p3 = simd.zip_low_u8x16(zip1, zeroes).reinterpret_u32().cvt_f32();
107    let p4 = simd
108        .zip_high_u8x16(zip1, zeroes)
109        .reinterpret_u32()
110        .cvt_f32();
111
112    simd.combine_f32x8(simd.combine_f32x4(p1, p2), simd.combine_f32x4(p3, p4))
113}
114
115pub trait CompositeType<N: Numeric, S: Simd>: Copy + Clone + Send + Sync {
116    const LENGTH: usize;
117
118    fn from_slice(simd: S, slice: &[N]) -> Self;
119    fn from_color(simd: S, color: [N; 4]) -> Self;
120}
121
122impl<S: Simd> CompositeType<f32, S> for f32x16<S> {
123    const LENGTH: usize = 16;
124
125    #[inline(always)]
126    fn from_slice(simd: S, slice: &[f32]) -> Self {
127        <Self as SimdBase<_, _>>::from_slice(simd, slice)
128    }
129
130    #[inline(always)]
131    fn from_color(simd: S, color: [f32; 4]) -> Self {
132        Self::block_splat(f32x4::from_slice(simd, &color[..]))
133    }
134}
135
136impl<S: Simd> CompositeType<u8, S> for u8x32<S> {
137    const LENGTH: usize = 32;
138
139    #[inline(always)]
140    fn from_slice(simd: S, slice: &[u8]) -> Self {
141        <Self as SimdBase<_, _>>::from_slice(simd, slice)
142    }
143
144    #[inline(always)]
145    fn from_color(simd: S, color: [u8; 4]) -> Self {
146        u32x8::block_splat(u32x4::splat(simd, u32::from_ne_bytes(color))).reinterpret_u8()
147    }
148}
149
150/// A kernel for performing fine rasterization.
151pub trait FineKernel<S: Simd>: Send + Sync + 'static {
152    /// The basic underlying numerical type of the kernel.
153    type Numeric: Numeric;
154    /// The type that is used for blending and compositing.
155    type Composite: CompositeType<Self::Numeric, S>;
156    /// The base SIMD vector type for converting between u8 and f32.
157    type NumericVec: NumericVec<S>;
158
159    /// Extract the color from a premultiplied color.
160    fn extract_color(color: PremulColor) -> [Self::Numeric; 4];
161    /// Pack the blend buf into the given region.
162    fn pack(simd: S, region: &mut Region<'_>, blend_buf: &[Self::Numeric]);
163    /// Repeatedly copy the solid color into the target buffer.
164    fn copy_solid(simd: S, target: &mut [Self::Numeric], color: [Self::Numeric; 4]);
165    /// Return the painter used for painting gradients.
166    fn gradient_painter<'a>(
167        simd: S,
168        gradient: &'a EncodedGradient,
169        t_vals: &'a [f32],
170    ) -> impl Painter + 'a {
171        simd.vectorize(
172            #[inline(always)]
173            || GradientPainter::new(simd, gradient, false, t_vals),
174        )
175    }
176    /// Return the painter used for painting gradients, with support for masking undefined locations.
177    fn gradient_painter_with_undefined<'a>(
178        simd: S,
179        gradient: &'a EncodedGradient,
180        t_vals: &'a [f32],
181    ) -> impl Painter + 'a {
182        simd.vectorize(
183            #[inline(always)]
184            || GradientPainter::new(simd, gradient, true, t_vals),
185        )
186    }
187    /// Return the painter used for painting plain nearest-neighbor images.
188    ///
189    /// Plain nearest-neighbor images are images with the quality 'Low' and no skewing component in their
190    /// transform.
191    fn plain_nn_image_painter<'a>(
192        simd: S,
193        image: &'a EncodedImage,
194        pixmap: &'a Pixmap,
195        start_x: u16,
196        start_y: u16,
197    ) -> impl Painter + 'a {
198        simd.vectorize(
199            #[inline(always)]
200            || PlainNNImagePainter::new(simd, image, pixmap, start_x, start_y),
201        )
202    }
203    /// Return the painter used for painting plain nearest-neighbor images.
204    ///
205    /// Same as `plain_nn`, but must also support skewing transforms.
206    fn nn_image_painter<'a>(
207        simd: S,
208        image: &'a EncodedImage,
209        pixmap: &'a Pixmap,
210        start_x: u16,
211        start_y: u16,
212    ) -> impl Painter + 'a {
213        simd.vectorize(
214            #[inline(always)]
215            || NNImagePainter::new(simd, image, pixmap, start_x, start_y),
216        )
217    }
218    /// Return the painter used for painting image with `Medium` quality.
219    fn medium_quality_image_painter<'a>(
220        simd: S,
221        image: &'a EncodedImage,
222        pixmap: &'a Pixmap,
223        start_x: u16,
224        start_y: u16,
225    ) -> impl Painter + 'a {
226        simd.vectorize(
227            #[inline(always)]
228            || FilteredImagePainter::new(simd, image, pixmap, start_x, start_y),
229        )
230    }
231    /// Return the painter used for painting image with `High` quality.
232    fn high_quality_image_painter<'a>(
233        simd: S,
234        image: &'a EncodedImage,
235        pixmap: &'a Pixmap,
236        start_x: u16,
237        start_y: u16,
238    ) -> impl Painter + 'a {
239        simd.vectorize(
240            #[inline(always)]
241            || FilteredImagePainter::new(simd, image, pixmap, start_x, start_y),
242        )
243    }
244    /// Return the painter used for painting blurred rounded rectangles.
245    fn blurred_rounded_rectangle_painter(
246        simd: S,
247        rect: &EncodedBlurredRoundedRectangle,
248        start_x: u16,
249        start_y: u16,
250    ) -> impl Painter {
251        simd.vectorize(
252            #[inline(always)]
253            || BlurredRoundedRectFiller::new(simd, rect, start_x, start_y),
254        )
255    }
256    /// Apply the mask to the destination buffer.
257    fn apply_mask(simd: S, dest: &mut [Self::Numeric], src: impl Iterator<Item = Self::NumericVec>);
258    /// Apply the painter to the destination buffer.
259    fn apply_painter<'a>(simd: S, dest: &mut [Self::Numeric], painter: impl Painter + 'a);
260    /// Do basic alpha compositing with a solid color.
261    fn alpha_composite_solid(
262        simd: S,
263        target: &mut [Self::Numeric],
264        src: [Self::Numeric; 4],
265        alphas: Option<&[u8]>,
266    );
267    /// Do basic alpha compositing with the given buffer.
268    fn alpha_composite_buffer(
269        simd: S,
270        dest: &mut [Self::Numeric],
271        src: &[Self::Numeric],
272        alphas: Option<&[u8]>,
273    );
274    /// Blend the source into the destination with the given blend mode.
275    fn blend(
276        simd: S,
277        dest: &mut [Self::Numeric],
278        src: impl Iterator<Item = Self::Composite>,
279        blend_mode: BlendMode,
280        alphas: Option<&[u8]>,
281    );
282}
283
284/// An object for performing fine rasterization
285#[derive(Debug)]
286pub struct Fine<S: Simd, T: FineKernel<S>> {
287    /// The coordinates of the currently covered wide tile.
288    pub(crate) wide_coords: (u16, u16),
289    /// The stack of blend buffers.
290    pub(crate) blend_buf: Vec<ScratchBuf<T::Numeric>>,
291    /// An intermediate buffer used by shaders to store their contents.
292    pub(crate) paint_buf: ScratchBuf<T::Numeric>,
293    /// An intermediate buffer used by gradients to store the t values.
294    pub(crate) f32_buf: Vec<f32>,
295    pub(crate) simd: S,
296}
297
298impl<S: Simd, T: FineKernel<S>> Fine<S, T> {
299    pub fn new(simd: S) -> Self {
300        Self {
301            simd,
302            wide_coords: (0, 0),
303            blend_buf: vec![[T::Numeric::ZERO; SCRATCH_BUF_SIZE]],
304            f32_buf: vec![0.0; SCRATCH_BUF_SIZE / 4],
305            paint_buf: [T::Numeric::ZERO; SCRATCH_BUF_SIZE],
306        }
307    }
308
309    pub fn set_coords(&mut self, x: u16, y: u16) {
310        self.wide_coords = (x, y);
311    }
312
313    pub fn clear(&mut self, premul_color: PremulColor) {
314        let converted_color = T::extract_color(premul_color);
315        let blend_buf = self.blend_buf.last_mut().unwrap();
316
317        T::copy_solid(self.simd, blend_buf, converted_color);
318    }
319
320    pub fn pack(&self, region: &mut Region<'_>) {
321        let blend_buf = self.blend_buf.last().unwrap();
322
323        T::pack(self.simd, region, blend_buf);
324    }
325
326    pub(crate) fn run_cmd(&mut self, cmd: &Cmd, alphas: &[u8], paints: &[EncodedPaint]) {
327        match cmd {
328            Cmd::Fill(f) => {
329                self.fill(
330                    usize::from(f.x),
331                    usize::from(f.width),
332                    &f.paint,
333                    f.blend_mode
334                        .unwrap_or(BlendMode::new(Mix::Normal, Compose::SrcOver)),
335                    paints,
336                    None,
337                );
338            }
339            Cmd::AlphaFill(s) => {
340                self.fill(
341                    usize::from(s.x),
342                    usize::from(s.width),
343                    &s.paint,
344                    s.blend_mode
345                        .unwrap_or(BlendMode::new(Mix::Normal, Compose::SrcOver)),
346                    paints,
347                    Some(&alphas[s.alpha_idx..]),
348                );
349            }
350            Cmd::PushBuf => {
351                self.blend_buf.push([T::Numeric::ZERO; SCRATCH_BUF_SIZE]);
352            }
353            Cmd::PopBuf => {
354                self.blend_buf.pop();
355            }
356            Cmd::ClipFill(cf) => {
357                self.clip(cf.x as usize, cf.width as usize, None);
358            }
359            Cmd::ClipStrip(cs) => {
360                self.clip(
361                    cs.x as usize,
362                    cs.width as usize,
363                    Some(&alphas[cs.alpha_idx..]),
364                );
365            }
366            Cmd::Blend(b) => self.blend(*b),
367            Cmd::Mask(m) => {
368                let start_x = self.wide_coords.0 * WideTile::WIDTH;
369                let start_y = self.wide_coords.1 * Tile::HEIGHT;
370
371                let blend_buf = self.blend_buf.last_mut().unwrap();
372
373                let width = (blend_buf.len() / (Tile::HEIGHT as usize * COLOR_COMPONENTS)) as u16;
374                let y = start_y as u32 + u32x4::from_slice(self.simd, &[0, 1, 2, 3]);
375
376                let iter = (start_x..(start_x + width)).map(|x| {
377                    let x_in_range = x < m.width();
378
379                    macro_rules! sample {
380                        ($idx:expr) => {
381                            if x_in_range && (y[$idx] as u16) < m.height() {
382                                m.sample(x, y[$idx] as u16)
383                            } else {
384                                0
385                            }
386                        };
387                    }
388
389                    let s1 = sample!(0);
390                    let s2 = sample!(1);
391                    let s3 = sample!(2);
392                    let s4 = sample!(3);
393
394                    let samples = u8x16::from_slice(
395                        self.simd,
396                        &[
397                            s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3, s4, s4, s4, s4,
398                        ],
399                    );
400                    T::NumericVec::from_u8(self.simd, samples)
401                });
402
403                T::apply_mask(self.simd, blend_buf, iter);
404            }
405            Cmd::Opacity(o) => {
406                if *o != 1.0 {
407                    let blend_buf = self.blend_buf.last_mut().unwrap();
408
409                    T::apply_mask(
410                        self.simd,
411                        blend_buf,
412                        iter::repeat(T::NumericVec::from_f32(
413                            self.simd,
414                            f32x16::splat(self.simd, *o),
415                        )),
416                    );
417                }
418            }
419        }
420    }
421
422    /// Fill at a given x and with a width using the given paint.
423    // For short strip segments, benchmarks showed that not inlining leads to significantly
424    // worse performance.
425    pub fn fill(
426        &mut self,
427        x: usize,
428        width: usize,
429        fill: &Paint,
430        blend_mode: BlendMode,
431        encoded_paints: &[EncodedPaint],
432        alphas: Option<&[u8]>,
433    ) {
434        let blend_buf = &mut self.blend_buf.last_mut().unwrap()[x * TILE_HEIGHT_COMPONENTS..]
435            [..TILE_HEIGHT_COMPONENTS * width];
436        let default_blend = blend_mode.is_default();
437
438        match fill {
439            Paint::Solid(color) => {
440                let color = T::extract_color(*color);
441
442                // If color is completely opaque, we can just directly override
443                // the blend buffer.
444                if color[3] == T::Numeric::ONE && default_blend && alphas.is_none() {
445                    T::copy_solid(self.simd, blend_buf, color);
446
447                    return;
448                }
449
450                if default_blend {
451                    T::alpha_composite_solid(self.simd, blend_buf, color, alphas);
452                } else {
453                    T::blend(
454                        self.simd,
455                        blend_buf,
456                        iter::repeat(T::Composite::from_color(self.simd, color)),
457                        blend_mode,
458                        alphas,
459                    );
460                }
461            }
462            Paint::Indexed(paint) => {
463                let color_buf = &mut self.paint_buf[x * TILE_HEIGHT_COMPONENTS..]
464                    [..TILE_HEIGHT_COMPONENTS * width];
465
466                let encoded_paint = &encoded_paints[paint.index()];
467
468                let start_x = self.wide_coords.0 * WideTile::WIDTH + x as u16;
469                let start_y = self.wide_coords.1 * Tile::HEIGHT;
470
471                // We need to have this as a macro because closures cannot take generic arguments, and
472                // we would have to repeatedly provide all arguments if we made it a function.
473                macro_rules! fill_complex_paint {
474                    ($has_opacities:expr, $filler:expr) => {
475                        if $has_opacities || alphas.is_some() {
476                            T::apply_painter(self.simd, color_buf, $filler);
477
478                            if default_blend {
479                                T::alpha_composite_buffer(self.simd, blend_buf, color_buf, alphas);
480                            } else {
481                                T::blend(
482                                    self.simd,
483                                    blend_buf,
484                                    color_buf
485                                        .chunks_exact(T::Composite::LENGTH)
486                                        .map(|s| T::Composite::from_slice(self.simd, s)),
487                                    blend_mode,
488                                    alphas,
489                                );
490                            }
491                        } else {
492                            // Similarly to solid colors we can just override the previous values
493                            // if all colors in the gradient are fully opaque.
494                            T::apply_painter(self.simd, blend_buf, $filler);
495                        }
496                    };
497                }
498
499                match encoded_paint {
500                    EncodedPaint::BlurredRoundedRect(b) => {
501                        fill_complex_paint!(
502                            true,
503                            T::blurred_rounded_rectangle_painter(self.simd, b, start_x, start_y)
504                        );
505                    }
506                    EncodedPaint::Gradient(g) => {
507                        // Note that we are calculating the t values first, store them in a separate
508                        // buffer and then pass that buffer to the iterator instead of calculating
509                        // the t values on the fly in the iterator. The latter would be faster, but
510                        // it would probably increase code size a lot, because the functions for
511                        // position calculation need to be inlined for good performance.
512                        let f32_buf = &mut self.f32_buf[..width * Tile::HEIGHT as usize];
513
514                        match &g.kind {
515                            EncodedKind::Linear(l) => {
516                                calculate_t_vals(
517                                    self.simd,
518                                    SimdLinearKind::new(self.simd, *l),
519                                    f32_buf,
520                                    g,
521                                    start_x,
522                                    start_y,
523                                );
524
525                                fill_complex_paint!(
526                                    g.has_opacities,
527                                    T::gradient_painter(self.simd, g, f32_buf)
528                                );
529                            }
530                            EncodedKind::Sweep(s) => {
531                                calculate_t_vals(
532                                    self.simd,
533                                    SimdSweepKind::new(self.simd, s),
534                                    f32_buf,
535                                    g,
536                                    start_x,
537                                    start_y,
538                                );
539
540                                fill_complex_paint!(
541                                    g.has_opacities,
542                                    T::gradient_painter(self.simd, g, f32_buf)
543                                );
544                            }
545                            EncodedKind::Radial(r) => {
546                                calculate_t_vals(
547                                    self.simd,
548                                    SimdRadialKind::new(self.simd, r),
549                                    f32_buf,
550                                    g,
551                                    start_x,
552                                    start_y,
553                                );
554
555                                if r.has_undefined() {
556                                    fill_complex_paint!(
557                                        g.has_opacities,
558                                        T::gradient_painter_with_undefined(self.simd, g, f32_buf)
559                                    );
560                                } else {
561                                    fill_complex_paint!(
562                                        g.has_opacities,
563                                        T::gradient_painter(self.simd, g, f32_buf)
564                                    );
565                                }
566                            }
567                        }
568                    }
569                    EncodedPaint::Image(i) => {
570                        let ImageSource::Pixmap(pixmap) = &i.source else {
571                            panic!("vello_cpu doesn't support the opaque image source.");
572                        };
573
574                        match (i.has_skew(), i.nearest_neighbor()) {
575                            (_, false) => {
576                                if i.sampler.quality == ImageQuality::Medium {
577                                    fill_complex_paint!(
578                                        i.has_opacities,
579                                        T::medium_quality_image_painter(
580                                            self.simd, i, pixmap, start_x, start_y
581                                        )
582                                    );
583                                } else {
584                                    fill_complex_paint!(
585                                        i.has_opacities,
586                                        T::high_quality_image_painter(
587                                            self.simd, i, pixmap, start_x, start_y
588                                        )
589                                    );
590                                }
591                            }
592                            (false, true) => {
593                                fill_complex_paint!(
594                                    i.has_opacities,
595                                    T::plain_nn_image_painter(
596                                        self.simd, i, pixmap, start_x, start_y
597                                    )
598                                );
599                            }
600                            (true, true) => {
601                                fill_complex_paint!(
602                                    i.has_opacities,
603                                    T::nn_image_painter(self.simd, i, pixmap, start_x, start_y)
604                                );
605                            }
606                        }
607                    }
608                }
609            }
610        }
611    }
612
613    fn blend(&mut self, blend_mode: BlendMode) {
614        let (source_buffer, rest) = self.blend_buf.split_last_mut().unwrap();
615        let target_buffer = rest.last_mut().unwrap();
616
617        if blend_mode.is_default() {
618            T::alpha_composite_buffer(self.simd, target_buffer, source_buffer, None);
619        } else {
620            T::blend(
621                self.simd,
622                target_buffer,
623                source_buffer
624                    .chunks_exact(T::Composite::LENGTH)
625                    .map(|s| T::Composite::from_slice(self.simd, s)),
626                blend_mode,
627                None,
628            );
629        }
630    }
631
632    fn clip(&mut self, x: usize, width: usize, alphas: Option<&[u8]>) {
633        let (source_buffer, rest) = self.blend_buf.split_last_mut().unwrap();
634        let target_buffer = rest.last_mut().unwrap();
635
636        let source_buffer =
637            &mut source_buffer[x * TILE_HEIGHT_COMPONENTS..][..TILE_HEIGHT_COMPONENTS * width];
638        let target_buffer =
639            &mut target_buffer[x * TILE_HEIGHT_COMPONENTS..][..TILE_HEIGHT_COMPONENTS * width];
640
641        T::alpha_composite_buffer(self.simd, target_buffer, source_buffer, alphas);
642    }
643}
644
645/// A trait for shaders that can render their contents into a u8/f32 buffer. Note that while
646/// the trait has a method for both, f32 and u8, some shaders might only support 1 of them, so
647/// care is needed when using them.
648pub trait Painter {
649    fn paint_u8(&mut self, buf: &mut [u8]);
650    fn paint_f32(&mut self, buf: &mut [f32]);
651}
652
653/// Calculate the x/y position using the x/y advances for each pixel, assuming a tile height of 4.
654pub trait PosExt<S: Simd> {
655    fn splat_pos(simd: S, pos: f32, x_advance: f32, y_advance: f32) -> Self;
656}
657
658impl<S: Simd> PosExt<S> for f32x4<S> {
659    #[inline(always)]
660    fn splat_pos(simd: S, pos: f32, _: f32, y_advance: f32) -> Self {
661        let columns: [f32; Tile::HEIGHT as usize] = [0.0, 1.0, 2.0, 3.0];
662        let column_mask: Self = columns.simd_into(simd);
663
664        column_mask.madd(Self::splat(simd, y_advance), Self::splat(simd, pos))
665    }
666}
667
668impl<S: Simd> PosExt<S> for f32x8<S> {
669    #[inline(always)]
670    fn splat_pos(simd: S, pos: f32, x_advance: f32, y_advance: f32) -> Self {
671        simd.combine_f32x4(
672            f32x4::splat_pos(simd, pos, x_advance, y_advance),
673            f32x4::splat_pos(simd, pos + x_advance, x_advance, y_advance),
674        )
675    }
676}
677
678/// The results of an f32 shader, where each channel stored separately.
679pub(crate) struct ShaderResultF32<S: Simd> {
680    pub(crate) r: f32x8<S>,
681    pub(crate) g: f32x8<S>,
682    pub(crate) b: f32x8<S>,
683    pub(crate) a: f32x8<S>,
684}
685
686impl<S: Simd> ShaderResultF32<S> {
687    /// Convert the result into two f32x16 elements, interleaved as RGBA.
688    #[inline(always)]
689    pub(crate) fn get(&self) -> (f32x16<S>, f32x16<S>) {
690        let (r_1, r_2) = self.r.simd.split_f32x8(self.r);
691        let (g_1, g_2) = self.g.simd.split_f32x8(self.g);
692        let (b_1, b_2) = self.b.simd.split_f32x8(self.b);
693        let (a_1, a_2) = self.a.simd.split_f32x8(self.a);
694
695        let first = self.r.simd.combine_f32x8(
696            self.r.simd.combine_f32x4(r_1, g_1),
697            self.r.simd.combine_f32x4(b_1, a_1),
698        );
699
700        let second = self.r.simd.combine_f32x8(
701            self.r.simd.combine_f32x4(r_2, g_2),
702            self.r.simd.combine_f32x4(b_2, a_2),
703        );
704
705        (first, second)
706    }
707}
708
709mod macros {
710    /// The default `Painter` implementation for an iterator
711    /// that returns its results as f32x16.
712    macro_rules! f32x16_painter {
713        ($($type_path:tt)+) => {
714            impl<S: Simd> crate::fine::Painter for $($type_path)+ {
715                fn paint_u8(&mut self, buf: &mut [u8]) {
716                    use vello_common::fearless_simd::*;
717                    use crate::fine::NumericVec;
718
719                    self.simd.vectorize(#[inline(always)] || {
720                        for chunk in buf.chunks_exact_mut(16) {
721                            let next = self.next().unwrap();
722                            let converted = u8x16::<S>::from_f32(next.simd, next);
723                            chunk.copy_from_slice(&converted.val);
724                        }
725                    })
726                }
727
728                fn paint_f32(&mut self, buf: &mut [f32]) {
729                    self.simd.vectorize(#[inline(always)] || {
730                        for chunk in buf.chunks_exact_mut(16) {
731                            let next = self.next().unwrap();
732                            chunk.copy_from_slice(&next.val);
733                        }
734                    })
735                }
736            }
737        };
738    }
739
740    /// The default `Painter` implementation for an iterator
741    /// that returns its results as u8x16.
742    macro_rules! u8x16_painter {
743        ($($type_path:tt)+) => {
744            impl<S: Simd> crate::fine::Painter for $($type_path)+ {
745                fn paint_u8(&mut self, buf: &mut [u8]) {
746                    self.simd.vectorize(#[inline(always)] || {
747                        for chunk in buf.chunks_exact_mut(16) {
748                            let next = self.next().unwrap();
749                            chunk.copy_from_slice(&next.val);
750                        }
751                    })
752                }
753
754                fn paint_f32(&mut self, buf: &mut [f32]) {
755                    use vello_common::fearless_simd::*;
756                    use crate::fine::NumericVec;
757
758                    self.simd.vectorize(#[inline(always)] || {
759                        for chunk in buf.chunks_exact_mut(16) {
760                            let next = self.next().unwrap();
761                            let converted = f32x16::<S>::from_u8(next.simd, next);
762                            chunk.copy_from_slice(&converted.val);
763                        }
764                    })
765                }
766            }
767        };
768    }
769
770    pub(crate) use f32x16_painter;
771    pub(crate) use u8x16_painter;
772}
vello_cpu/fine/mod.rs

vello_cpu/fine/
mod.rs