Skip to main content

vello_cpu/dispatch/
single_threaded.rs

1// Copyright 2025 the Vello Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4use crate::RenderMode;
5use crate::dispatch::Dispatcher;
6use crate::fine::{Fine, FineKernel};
7use crate::kurbo::{Affine, BezPath, Stroke};
8use crate::layer_manager::LayerManager;
9use crate::peniko::{BlendMode, Fill};
10use crate::region::Regions;
11use vello_common::clip::ClipContext;
12use vello_common::coarse::{Cmd, LayerKind, MODE_CPU, Wide, WideTilesBbox};
13use vello_common::color::palette::css::TRANSPARENT;
14use vello_common::encode::EncodedPaint;
15use vello_common::fearless_simd::{Level, Simd};
16use vello_common::filter_effects::Filter;
17use vello_common::mask::Mask;
18use vello_common::paint::{Paint, PremulColor};
19use vello_common::pixmap::Pixmap;
20use vello_common::render_graph::{RenderGraph, RenderNodeKind};
21use vello_common::strip::Strip;
22use vello_common::strip_generator::{StripGenerator, StripStorage};
23
24/// Single-threaded implementation of the rendering dispatcher.
25///
26/// This dispatcher handles the entire rendering pipeline on a single thread,
27/// including path rasterization, layer composition, and filter effects.
28/// It maintains the coarse tile grid (`Wide`), strip generation for paths,
29/// and the render graph for managing layer dependencies and filter effects.
30#[derive(Debug)]
31pub(crate) struct SingleThreadedDispatcher {
32    /// Coarse tile grid containing rendering commands for each wide tile.
33    wide: Wide,
34    /// Clip context for managing non-isolated clipping.
35    clip_context: ClipContext,
36    /// Generator for converting paths into coverage strips.
37    strip_generator: StripGenerator,
38    /// Storage for alpha coverage data from strip generation.
39    strip_storage: StripStorage,
40    /// SIMD level for fearless SIMD dispatch.
41    level: Level,
42    /// Counter for generating unique layer IDs.
43    layer_id_next: u32,
44    /// Dependency graph tracking layer relationships and filter effects.
45    render_graph: RenderGraph,
46}
47
48impl SingleThreadedDispatcher {
49    /// Creates a new single-threaded dispatcher for the given dimensions.
50    ///
51    /// # Arguments
52    /// * `width` - Width of the rendering surface in pixels.
53    /// * `height` - Height of the rendering surface in pixels.
54    /// * `level` - SIMD level to use for rasterization.
55    ///
56    /// # Notes
57    /// The root layer (`layer_id` 0) is created immediately and must be node 0
58    /// in the render graph for proper rendering order.
59    pub(crate) fn new(width: u16, height: u16, level: Level) -> Self {
60        let wide = Wide::<MODE_CPU>::new(width, height);
61        let strip_generator = StripGenerator::new(width, height, level);
62        let clip_context = ClipContext::new();
63        let strip_storage = StripStorage::default();
64        let mut render_graph = RenderGraph::new();
65
66        // Create root node (layer_id 0) as the first node (will be node 0).
67        // This ensures the root layer is always rendered last in the execution order.
68        let wtile_bbox = WideTilesBbox::new([0, 0, wide.width_tiles(), wide.height_tiles()]);
69        let root_node = render_graph.add_node(RenderNodeKind::RootLayer {
70            layer_id: 0,
71            wtile_bbox,
72        });
73        assert_eq!(root_node, 0, "Root node must be node 0");
74
75        Self {
76            wide,
77            clip_context,
78            strip_generator,
79            strip_storage,
80            level,
81            layer_id_next: 0,
82            render_graph,
83        }
84    }
85
86    /// Rasterizes the scene using f32 precision (high quality).
87    ///
88    /// This dispatches to the appropriate SIMD implementation based on the
89    /// configured level, using f32 for intermediate calculations.
90    #[cfg(feature = "f32_pipeline")]
91    fn rasterize_f32(
92        &self,
93        buffer: &mut [u8],
94        width: u16,
95        height: u16,
96        encoded_paints: &[EncodedPaint],
97    ) {
98        use crate::fine::F32Kernel;
99        use vello_common::fearless_simd::dispatch;
100        dispatch!(self.level, simd => self.rasterize_with::<_, F32Kernel>(simd, buffer, width, height, encoded_paints));
101    }
102
103    /// Rasterizes the scene using u8 precision (fast).
104    ///
105    /// This dispatches to the appropriate SIMD implementation based on the
106    /// configured level, using u8 for intermediate calculations to maximize speed.
107    #[cfg(feature = "u8_pipeline")]
108    fn rasterize_u8(
109        &self,
110        buffer: &mut [u8],
111        width: u16,
112        height: u16,
113        encoded_paints: &[EncodedPaint],
114    ) {
115        use crate::fine::U8Kernel;
116        use vello_common::fearless_simd::dispatch;
117        dispatch!(self.level, simd => self.rasterize_with::<_, U8Kernel>(simd, buffer, width, height, encoded_paints));
118    }
119
120    /// Core rasterization dispatcher that chooses between simple and filter-aware paths.
121    ///
122    /// # Type Parameters
123    /// * `S` - SIMD implementation to use.
124    /// * `F` - Fine rasterization kernel (determines precision).
125    ///
126    /// If the scene contains filter effects, uses the filter-aware path which maintains
127    /// intermediate layer buffers. Otherwise, uses the simpler direct rasterization path.
128    fn rasterize_with<S: Simd, F: FineKernel<S>>(
129        &self,
130        simd: S,
131        buffer: &mut [u8],
132        width: u16,
133        height: u16,
134        encoded_paints: &[EncodedPaint],
135    ) {
136        let mut layer_manager = LayerManager::new();
137
138        if self.has_filters() {
139            // Use filter-aware path that maintains layer buffers for filter effects.
140            self.rasterize_with_filters::<S, F>(
141                simd,
142                buffer,
143                width,
144                height,
145                encoded_paints,
146                &mut layer_manager,
147            );
148        } else {
149            // Use simple direct rasterization for scenes without filters.
150            self.rasterize_simple::<S, F>(simd, buffer, width, height, encoded_paints);
151        }
152    }
153
154    /// Rasterizes a scene with filter effects using dependency-ordered execution.
155    ///
156    /// This processes the render graph in topological order, ensuring that filtered
157    /// layers are rendered into intermediate buffers before being composed. Each
158    /// filter layer is rendered to its own pixmap, the filter is applied, and then
159    /// the result is stored in the layer manager for use by dependent layers.
160    ///
161    /// # Render Graph Execution
162    /// - `FilterLayer` nodes: Render to intermediate buffer, apply filter, store result.
163    /// - `RootLayer` node: Final composition to output buffer.
164    fn rasterize_with_filters<S: Simd, F: FineKernel<S>>(
165        &self,
166        simd: S,
167        buffer: &mut [u8],
168        width: u16,
169        height: u16,
170        encoded_paints: &[EncodedPaint],
171        layer_manager: &mut LayerManager,
172    ) {
173        let mut fine = Fine::<S, F>::new(simd);
174
175        // Process nodes in dependency order (filtered layers before their consumers).
176        for node_id in self.render_graph.execution_order() {
177            let node = &self.render_graph.nodes[node_id];
178
179            match &node.kind {
180                RenderNodeKind::FilterLayer {
181                    layer_id,
182                    filter,
183                    wtile_bbox,
184                    transform,
185                } => {
186                    // Allocate intermediate buffer for this filtered layer.
187                    let bbox_width = wtile_bbox.width_px();
188                    let bbox_height = wtile_bbox.height_px();
189                    let mut pixmap = Pixmap::new(bbox_width, bbox_height);
190                    // TODO: Re-use this allocation by adding a .configure() or similar method
191                    // to avoid allocating the internal Vec<Region> on every filtered layer.
192                    let mut regions =
193                        Regions::new(bbox_width, bbox_height, pixmap.data_as_u8_slice_mut());
194
195                    // Render each tile in the layer's bounding box.
196                    regions.update_regions(|region| {
197                        // Convert region-local coords to global wtile coords.
198                        let x = wtile_bbox.x0() + region.x;
199                        let y = wtile_bbox.y0() + region.y;
200
201                        self.process_layer_tile(
202                            &mut fine,
203                            x,
204                            y,
205                            *layer_id,
206                            PremulColor::from_alpha_color(TRANSPARENT),
207                            layer_manager,
208                            encoded_paints,
209                        );
210
211                        debug_assert_eq!(
212                            fine.blend_buf.len(),
213                            1,
214                            "blend buffer should contain exactly one layer after tile processing"
215                        );
216
217                        fine.pack(region);
218                    });
219
220                    // Apply the filter effect to the completed layer.
221                    fine.filter_layer(&mut pixmap, filter, layer_manager, *transform);
222
223                    // Save the filtered pixmap to disk for debugging.
224                    // #[cfg(all(debug_assertions, feature = "std", feature = "png"))]
225                    // save_filtered_layer_debug(&pixmap, *layer_id);
226
227                    // Store the filtered result for use by dependent layers.
228                    layer_manager.register_layer(*layer_id, *wtile_bbox, pixmap);
229                }
230                RenderNodeKind::RootLayer {
231                    layer_id,
232                    wtile_bbox: _,
233                } => {
234                    // Final composition directly to output buffer.
235                    let mut regions = Regions::new(width, height, buffer);
236                    regions.update_regions(|region| {
237                        // Use the background color from the wide tile.
238                        let bg = self.wide.get(region.x, region.y).bg;
239                        self.process_layer_tile(
240                            &mut fine,
241                            region.x,
242                            region.y,
243                            *layer_id,
244                            bg,
245                            layer_manager,
246                            encoded_paints,
247                        );
248
249                        debug_assert_eq!(
250                            fine.blend_buf.len(),
251                            1,
252                            "blend buffer should contain exactly one layer after tile processing"
253                        );
254
255                        fine.pack(region);
256                    });
257                }
258            }
259        }
260    }
261
262    /// Processes all rendering commands for a single layer within a specific tile.
263    ///
264    /// This handles the complex logic of composing filtered layers by:
265    /// 1. Running normal rendering commands in sequence.
266    /// 2. When encountering a filtered layer reference, compositing its pre-rendered
267    ///    content from the layer manager.
268    /// 3. Skipping the filtered layer's internal commands (already rendered separately).
269    ///
270    /// # Arguments
271    /// * `fine` - The fine rasterizer instance.
272    /// * `x`, `y` - Wide tile coordinates.
273    /// * `layer_id` - The layer being processed.
274    /// * `clear_color` - Initial color for the tile.
275    /// * `layer_manager` - Storage for filtered layer buffers.
276    /// * `encoded_paints` - Paint definitions for the scene.
277    fn process_layer_tile<S: Simd, F: FineKernel<S>>(
278        &self,
279        fine: &mut Fine<S, F>,
280        x: u16,
281        y: u16,
282        layer_id: u32,
283        clear_color: PremulColor,
284        layer_manager: &mut LayerManager,
285        encoded_paints: &[EncodedPaint],
286    ) {
287        let wtile = &self.wide.get(x, y);
288        fine.set_coords(x, y);
289        fine.clear(clear_color);
290
291        // Process all commands in this layer's render range.
292        // Invariant: tiles within a layer's bbox must have commands for that layer.
293        let ranges = wtile.layer_cmd_ranges.get(&layer_id).unwrap();
294
295        let mut cmd_idx = ranges.render_range.start;
296        while cmd_idx < ranges.render_range.end {
297            let cmd: &Cmd = &wtile.cmds[cmd_idx];
298
299            fine.run_cmd(
300                cmd,
301                &self.strip_storage.alphas,
302                encoded_paints,
303                &self.wide.attrs,
304            );
305
306            // Special handling for filtered layer composition.
307            // Filtered layers have already been rendered and stored in layer_manager.
308            // Here we composite them into the current buffer, with special handling for clipping.
309            if let Cmd::PushBuf(LayerKind::Filtered(child_layer_id)) = cmd {
310                // Invariant: PushBuf(Filtered) command must have corresponding layer_cmd_ranges entry.
311                let filtered_ranges = wtile.layer_cmd_ranges.get(child_layer_id).unwrap();
312
313                // Check what comes after the filtered layer push to determine clipping state
314                match wtile.cmds.get(cmd_idx + 1) {
315                    // Zero-clip region: tile is completely outside the clip path.
316                    // The layer was already rendered for filtering, but we skip compositing
317                    // since this tile is entirely clipped out.
318                    // (PushZeroClip only appears for clipped filter layers)
319                    Some(Cmd::PushZeroClip(id)) if *id == *child_layer_id => {
320                        cmd_idx += 1; // Skip the PushZeroClip command
321                    }
322
323                    // Partial clip: push the clip buffer, then composite the filtered layer
324                    Some(Cmd::PushBuf(LayerKind::Clip(_))) => {
325                        fine.run_cmd(
326                            &wtile.cmds[cmd_idx + 1],
327                            &self.strip_storage.alphas,
328                            encoded_paints,
329                            &self.wide.attrs,
330                        );
331                        cmd_idx += 1;
332
333                        if let Some(mut region) =
334                            layer_manager.layer_tile_region_mut(*child_layer_id, x, y)
335                        {
336                            fine.unpack(&mut region);
337                        }
338                    }
339
340                    // No clip or fully inside clip: composite the filtered layer directly
341                    _ => {
342                        if let Some(mut region) =
343                            layer_manager.layer_tile_region_mut(*child_layer_id, x, y)
344                        {
345                            fine.unpack(&mut region);
346                        }
347                    }
348                }
349
350                // Skip past the filtered layer's internal commands, as they were already
351                // rendered when the FilterLayer node was processed earlier.
352                cmd_idx = filtered_ranges.render_range.end.max(cmd_idx + 1);
353            } else {
354                cmd_idx += 1;
355            }
356        }
357    }
358
359    /// Simple rasterization path for scenes without filter effects.
360    ///
361    /// This directly processes each tile's commands without maintaining intermediate
362    /// layer buffers. All rendering happens in a single pass directly to the output buffer.
363    /// This is more efficient than the filter-aware path when no filters are present.
364    fn rasterize_simple<S: Simd, F: FineKernel<S>>(
365        &self,
366        simd: S,
367        buffer: &mut [u8],
368        width: u16,
369        height: u16,
370        encoded_paints: &[EncodedPaint],
371    ) {
372        let mut buffer = Regions::new(width, height, buffer);
373        let mut fine = Fine::<S, F>::new(simd);
374
375        buffer.update_regions(|region| {
376            let x = region.x;
377            let y = region.y;
378
379            let wtile = self.wide.get(x, y);
380            fine.set_coords(x, y);
381
382            // Clear to background and process all commands in order.
383            fine.clear(wtile.bg);
384            for cmd in &wtile.cmds {
385                fine.run_cmd(
386                    cmd,
387                    &self.strip_storage.alphas,
388                    encoded_paints,
389                    &self.wide.attrs,
390                );
391            }
392
393            fine.pack(region);
394        });
395    }
396
397    /// Returns true if the scene contains any filter effects.
398    fn has_filters(&self) -> bool {
399        self.render_graph.has_filters()
400    }
401}
402
403impl Dispatcher for SingleThreadedDispatcher {
404    fn wide(&self) -> &Wide {
405        &self.wide
406    }
407
408    fn fill_path(
409        &mut self,
410        path: &BezPath,
411        fill_rule: Fill,
412        transform: Affine,
413        paint: Paint,
414        blend_mode: BlendMode,
415        aliasing_threshold: Option<u8>,
416        mask: Option<Mask>,
417        encoded_paints: &[EncodedPaint],
418    ) {
419        let wide = &mut self.wide;
420
421        // Convert path to coverage strips.
422        self.strip_generator.generate_filled_path(
423            path,
424            fill_rule,
425            transform,
426            aliasing_threshold,
427            &mut self.strip_storage,
428            self.clip_context.get(),
429        );
430
431        // Generate coarse-level commands from strips (layer_id 0 = root layer).
432        wide.generate(
433            &self.strip_storage.strips,
434            paint,
435            blend_mode,
436            0,
437            mask,
438            encoded_paints,
439        );
440    }
441
442    fn stroke_path(
443        &mut self,
444        path: &BezPath,
445        stroke: &Stroke,
446        transform: Affine,
447        paint: Paint,
448        blend_mode: BlendMode,
449        aliasing_threshold: Option<u8>,
450        mask: Option<Mask>,
451        encoded_paints: &[EncodedPaint],
452    ) {
453        let wide = &mut self.wide;
454
455        // Convert stroked path to coverage strips.
456        self.strip_generator.generate_stroked_path(
457            path,
458            stroke,
459            transform,
460            aliasing_threshold,
461            &mut self.strip_storage,
462            self.clip_context.get(),
463        );
464
465        // Generate coarse-level commands from strips (layer_id 0 = root layer).
466        wide.generate(
467            &self.strip_storage.strips,
468            paint,
469            blend_mode,
470            0,
471            mask,
472            encoded_paints,
473        );
474    }
475
476    fn push_layer(
477        &mut self,
478        clip_path: Option<&BezPath>,
479        fill_rule: Fill,
480        transform: Affine,
481        blend_mode: BlendMode,
482        opacity: f32,
483        aliasing_threshold: Option<u8>,
484        mask: Option<Mask>,
485        filter: Option<Filter>,
486    ) {
487        // Allocate a new unique layer ID.
488        self.layer_id_next += 1;
489
490        // Generate clip coverage if a clip path is provided.
491        let clip = if let Some(c) = clip_path {
492            self.strip_generator.generate_filled_path(
493                c,
494                fill_rule,
495                transform,
496                aliasing_threshold,
497                &mut self.strip_storage,
498                self.clip_context.get(),
499            );
500
501            Some(self.strip_storage.strips.as_slice())
502        } else {
503            None
504        };
505
506        // Push the layer onto the coarse tile stack and update render graph.
507        self.wide.push_layer(
508            self.layer_id_next,
509            clip,
510            blend_mode,
511            mask,
512            opacity,
513            filter,
514            transform,
515            &mut self.render_graph,
516            0,
517        );
518    }
519
520    fn pop_layer(&mut self) {
521        // Pop the current layer and update render graph.
522        self.wide.pop_layer(&mut self.render_graph);
523    }
524
525    fn reset(&mut self) {
526        // Clear all rendering state to prepare for a new scene.
527        self.wide.reset();
528        self.clip_context.reset();
529        self.strip_generator.reset();
530        self.strip_storage.clear();
531        self.render_graph.clear();
532        self.layer_id_next = 0;
533
534        // Recreate root node as node 0 (required for proper execution order).
535        let root_node = self.render_graph.add_node(RenderNodeKind::RootLayer {
536            layer_id: 0,
537            wtile_bbox: WideTilesBbox::new([
538                0,
539                0,
540                self.wide.width_tiles(),
541                self.wide.height_tiles(),
542            ]),
543        });
544        debug_assert_eq!(root_node, 0, "Root node must be node 0");
545
546        // Reset layer ID counter.
547        self.layer_id_next = 0;
548    }
549
550    fn flush(&mut self, _encoded_paints: &[EncodedPaint]) {
551        // No-op for single-threaded dispatcher (no work queue to flush).
552    }
553
554    fn rasterize(
555        &self,
556        buffer: &mut [u8],
557        render_mode: RenderMode,
558        width: u16,
559        height: u16,
560        encoded_paints: &[EncodedPaint],
561    ) {
562        // If only the u8 pipeline is enabled, then use it
563        #[cfg(all(feature = "u8_pipeline", not(feature = "f32_pipeline")))]
564        {
565            let _ = render_mode;
566            self.rasterize_u8(buffer, width, height, encoded_paints);
567        }
568
569        // If only the f32 pipeline is enabled, then use it
570        #[cfg(all(feature = "f32_pipeline", not(feature = "u8_pipeline")))]
571        {
572            let _ = render_mode;
573            self.rasterize_f32(buffer, width, height, encoded_paints);
574        }
575
576        // If both pipelines are enabled, select precision based on render mode parameter.
577        #[cfg(all(feature = "u8_pipeline", feature = "f32_pipeline"))]
578        match render_mode {
579            RenderMode::OptimizeSpeed => {
580                // Use u8 precision for faster rendering.
581                self.rasterize_u8(buffer, width, height, encoded_paints);
582            }
583            RenderMode::OptimizeQuality => {
584                // Use f32 precision for higher quality.
585                self.rasterize_f32(buffer, width, height, encoded_paints);
586            }
587        }
588
589        #[cfg(all(not(feature = "u8_pipeline"), not(feature = "f32_pipeline")))]
590        {
591            // This case never gets hit because there is a compile_error in the root.
592            // But have this code disables some warnings and makes the compile error easier to read
593            let _ = (buffer, render_mode, width, height, encoded_paints);
594        }
595    }
596
597    fn generate_wide_cmd(
598        &mut self,
599        strip_buf: &[Strip],
600        paint: Paint,
601        blend_mode: BlendMode,
602        encoded_paints: &[EncodedPaint],
603    ) {
604        // Generate coarse-level commands from pre-computed strips (layer_id 0 = root layer).
605        self.wide
606            .generate(strip_buf, paint, blend_mode, 0, None, encoded_paints);
607    }
608
609    fn strip_storage_mut(&mut self) -> &mut StripStorage {
610        &mut self.strip_storage
611    }
612
613    fn push_clip_path(
614        &mut self,
615        path: &BezPath,
616        fill_rule: Fill,
617        transform: Affine,
618        aliasing_threshold: Option<u8>,
619    ) {
620        self.clip_context.push_clip(
621            path,
622            &mut self.strip_generator,
623            fill_rule,
624            transform,
625            aliasing_threshold,
626        );
627    }
628
629    fn pop_clip_path(&mut self) {
630        self.clip_context.pop_clip();
631    }
632}
633
634/// Saves a filtered pixmap to disk for debugging purposes.
635/// Only available in debug builds with `std` and `png` features enabled.
636#[allow(
637    dead_code,
638    reason = "useful debug utility, can be enabled by uncommenting the call site"
639)]
640#[cfg(all(debug_assertions, feature = "std", feature = "png"))]
641fn save_filtered_layer_debug(pixmap: &Pixmap, layer_id: u32) {
642    use std::path::PathBuf;
643
644    let diffs_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../vello_sparse_tests/diffs");
645    let _ = std::fs::create_dir_all(&diffs_path);
646    let filename = diffs_path.join(alloc::format!("filtered_layer_{}.png", layer_id));
647
648    if let Ok(png_data) = pixmap.clone().into_png() {
649        let _ = std::fs::write(&filename, &png_data);
650    }
651}
652
653#[cfg(test)]
654mod tests {
655    use super::*;
656    use crate::kurbo::Rect;
657    use vello_common::color::palette::css::BLUE;
658    use vello_common::kurbo::Shape;
659    use vello_common::paint::PremulColor;
660
661    /// Verifies that `reset()` properly clears all internal buffers and state.
662    ///
663    /// This is important to ensure that a dispatcher can be reused for multiple
664    /// rendering passes without accumulating stale data from previous frames.
665    #[test]
666    fn buffers_cleared_on_reset() {
667        let mut dispatcher = SingleThreadedDispatcher::new(100, 100, Level::new());
668
669        // Render a simple shape to populate internal buffers.
670        dispatcher.fill_path(
671            &Rect::new(0.0, 0.0, 50.0, 50.0).to_path(0.1),
672            Fill::NonZero,
673            Affine::IDENTITY,
674            Paint::Solid(PremulColor::from_alpha_color(BLUE)),
675            BlendMode::default(),
676            None,
677            None,
678            &[],
679        );
680
681        // Ensure there is data to clear.
682        assert!(!dispatcher.strip_storage.alphas.is_empty());
683        assert!(!dispatcher.wide.get(0, 0).cmds.is_empty());
684
685        dispatcher.reset();
686
687        // Verify all buffers are cleared.
688        assert!(dispatcher.strip_storage.alphas.is_empty());
689        assert!(dispatcher.wide.get(0, 0).cmds.is_empty());
690        assert_eq!(dispatcher.layer_id_next, 0);
691    }
692}