vello_cpu/dispatch/single_threaded.rs
1// Copyright 2025 the Vello Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4use crate::RenderMode;
5use crate::dispatch::Dispatcher;
6use crate::fine::{Fine, FineKernel};
7use crate::kurbo::{Affine, BezPath, Stroke};
8use crate::layer_manager::LayerManager;
9use crate::peniko::{BlendMode, Fill};
10use crate::region::Regions;
11use vello_common::clip::ClipContext;
12use vello_common::coarse::{Cmd, LayerKind, MODE_CPU, Wide, WideTilesBbox};
13use vello_common::color::palette::css::TRANSPARENT;
14use vello_common::encode::EncodedPaint;
15use vello_common::fearless_simd::{Level, Simd};
16use vello_common::filter_effects::Filter;
17use vello_common::mask::Mask;
18use vello_common::paint::{Paint, PremulColor};
19use vello_common::pixmap::Pixmap;
20use vello_common::render_graph::{RenderGraph, RenderNodeKind};
21use vello_common::strip::Strip;
22use vello_common::strip_generator::{StripGenerator, StripStorage};
23
24/// Single-threaded implementation of the rendering dispatcher.
25///
26/// This dispatcher handles the entire rendering pipeline on a single thread,
27/// including path rasterization, layer composition, and filter effects.
28/// It maintains the coarse tile grid (`Wide`), strip generation for paths,
29/// and the render graph for managing layer dependencies and filter effects.
30#[derive(Debug)]
31pub(crate) struct SingleThreadedDispatcher {
32 /// Coarse tile grid containing rendering commands for each wide tile.
33 wide: Wide,
34 /// Clip context for managing non-isolated clipping.
35 clip_context: ClipContext,
36 /// Generator for converting paths into coverage strips.
37 strip_generator: StripGenerator,
38 /// Storage for alpha coverage data from strip generation.
39 strip_storage: StripStorage,
40 /// SIMD level for fearless SIMD dispatch.
41 level: Level,
42 /// Counter for generating unique layer IDs.
43 layer_id_next: u32,
44 /// Dependency graph tracking layer relationships and filter effects.
45 render_graph: RenderGraph,
46}
47
48impl SingleThreadedDispatcher {
49 /// Creates a new single-threaded dispatcher for the given dimensions.
50 ///
51 /// # Arguments
52 /// * `width` - Width of the rendering surface in pixels.
53 /// * `height` - Height of the rendering surface in pixels.
54 /// * `level` - SIMD level to use for rasterization.
55 ///
56 /// # Notes
57 /// The root layer (`layer_id` 0) is created immediately and must be node 0
58 /// in the render graph for proper rendering order.
59 pub(crate) fn new(width: u16, height: u16, level: Level) -> Self {
60 let wide = Wide::<MODE_CPU>::new(width, height);
61 let strip_generator = StripGenerator::new(width, height, level);
62 let clip_context = ClipContext::new();
63 let strip_storage = StripStorage::default();
64 let mut render_graph = RenderGraph::new();
65
66 // Create root node (layer_id 0) as the first node (will be node 0).
67 // This ensures the root layer is always rendered last in the execution order.
68 let wtile_bbox = WideTilesBbox::new([0, 0, wide.width_tiles(), wide.height_tiles()]);
69 let root_node = render_graph.add_node(RenderNodeKind::RootLayer {
70 layer_id: 0,
71 wtile_bbox,
72 });
73 assert_eq!(root_node, 0, "Root node must be node 0");
74
75 Self {
76 wide,
77 clip_context,
78 strip_generator,
79 strip_storage,
80 level,
81 layer_id_next: 0,
82 render_graph,
83 }
84 }
85
86 /// Rasterizes the scene using f32 precision (high quality).
87 ///
88 /// This dispatches to the appropriate SIMD implementation based on the
89 /// configured level, using f32 for intermediate calculations.
90 #[cfg(feature = "f32_pipeline")]
91 fn rasterize_f32(
92 &self,
93 buffer: &mut [u8],
94 width: u16,
95 height: u16,
96 encoded_paints: &[EncodedPaint],
97 ) {
98 use crate::fine::F32Kernel;
99 use vello_common::fearless_simd::dispatch;
100 dispatch!(self.level, simd => self.rasterize_with::<_, F32Kernel>(simd, buffer, width, height, encoded_paints));
101 }
102
103 /// Rasterizes the scene using u8 precision (fast).
104 ///
105 /// This dispatches to the appropriate SIMD implementation based on the
106 /// configured level, using u8 for intermediate calculations to maximize speed.
107 #[cfg(feature = "u8_pipeline")]
108 fn rasterize_u8(
109 &self,
110 buffer: &mut [u8],
111 width: u16,
112 height: u16,
113 encoded_paints: &[EncodedPaint],
114 ) {
115 use crate::fine::U8Kernel;
116 use vello_common::fearless_simd::dispatch;
117 dispatch!(self.level, simd => self.rasterize_with::<_, U8Kernel>(simd, buffer, width, height, encoded_paints));
118 }
119
120 /// Core rasterization dispatcher that chooses between simple and filter-aware paths.
121 ///
122 /// # Type Parameters
123 /// * `S` - SIMD implementation to use.
124 /// * `F` - Fine rasterization kernel (determines precision).
125 ///
126 /// If the scene contains filter effects, uses the filter-aware path which maintains
127 /// intermediate layer buffers. Otherwise, uses the simpler direct rasterization path.
128 fn rasterize_with<S: Simd, F: FineKernel<S>>(
129 &self,
130 simd: S,
131 buffer: &mut [u8],
132 width: u16,
133 height: u16,
134 encoded_paints: &[EncodedPaint],
135 ) {
136 let mut layer_manager = LayerManager::new();
137
138 if self.has_filters() {
139 // Use filter-aware path that maintains layer buffers for filter effects.
140 self.rasterize_with_filters::<S, F>(
141 simd,
142 buffer,
143 width,
144 height,
145 encoded_paints,
146 &mut layer_manager,
147 );
148 } else {
149 // Use simple direct rasterization for scenes without filters.
150 self.rasterize_simple::<S, F>(simd, buffer, width, height, encoded_paints);
151 }
152 }
153
154 /// Rasterizes a scene with filter effects using dependency-ordered execution.
155 ///
156 /// This processes the render graph in topological order, ensuring that filtered
157 /// layers are rendered into intermediate buffers before being composed. Each
158 /// filter layer is rendered to its own pixmap, the filter is applied, and then
159 /// the result is stored in the layer manager for use by dependent layers.
160 ///
161 /// # Render Graph Execution
162 /// - `FilterLayer` nodes: Render to intermediate buffer, apply filter, store result.
163 /// - `RootLayer` node: Final composition to output buffer.
164 fn rasterize_with_filters<S: Simd, F: FineKernel<S>>(
165 &self,
166 simd: S,
167 buffer: &mut [u8],
168 width: u16,
169 height: u16,
170 encoded_paints: &[EncodedPaint],
171 layer_manager: &mut LayerManager,
172 ) {
173 let mut fine = Fine::<S, F>::new(simd);
174
175 // Process nodes in dependency order (filtered layers before their consumers).
176 for node_id in self.render_graph.execution_order() {
177 let node = &self.render_graph.nodes[node_id];
178
179 match &node.kind {
180 RenderNodeKind::FilterLayer {
181 layer_id,
182 filter,
183 wtile_bbox,
184 transform,
185 } => {
186 // Allocate intermediate buffer for this filtered layer.
187 let bbox_width = wtile_bbox.width_px();
188 let bbox_height = wtile_bbox.height_px();
189 let mut pixmap = Pixmap::new(bbox_width, bbox_height);
190 // TODO: Re-use this allocation by adding a .configure() or similar method
191 // to avoid allocating the internal Vec<Region> on every filtered layer.
192 let mut regions =
193 Regions::new(bbox_width, bbox_height, pixmap.data_as_u8_slice_mut());
194
195 // Render each tile in the layer's bounding box.
196 regions.update_regions(|region| {
197 // Convert region-local coords to global wtile coords.
198 let x = wtile_bbox.x0() + region.x;
199 let y = wtile_bbox.y0() + region.y;
200
201 self.process_layer_tile(
202 &mut fine,
203 x,
204 y,
205 *layer_id,
206 PremulColor::from_alpha_color(TRANSPARENT),
207 layer_manager,
208 encoded_paints,
209 );
210
211 debug_assert_eq!(
212 fine.blend_buf.len(),
213 1,
214 "blend buffer should contain exactly one layer after tile processing"
215 );
216
217 fine.pack(region);
218 });
219
220 // Apply the filter effect to the completed layer.
221 fine.filter_layer(&mut pixmap, filter, layer_manager, *transform);
222
223 // Save the filtered pixmap to disk for debugging.
224 // #[cfg(all(debug_assertions, feature = "std", feature = "png"))]
225 // save_filtered_layer_debug(&pixmap, *layer_id);
226
227 // Store the filtered result for use by dependent layers.
228 layer_manager.register_layer(*layer_id, *wtile_bbox, pixmap);
229 }
230 RenderNodeKind::RootLayer {
231 layer_id,
232 wtile_bbox: _,
233 } => {
234 // Final composition directly to output buffer.
235 let mut regions = Regions::new(width, height, buffer);
236 regions.update_regions(|region| {
237 // Use the background color from the wide tile.
238 let bg = self.wide.get(region.x, region.y).bg;
239 self.process_layer_tile(
240 &mut fine,
241 region.x,
242 region.y,
243 *layer_id,
244 bg,
245 layer_manager,
246 encoded_paints,
247 );
248
249 debug_assert_eq!(
250 fine.blend_buf.len(),
251 1,
252 "blend buffer should contain exactly one layer after tile processing"
253 );
254
255 fine.pack(region);
256 });
257 }
258 }
259 }
260 }
261
262 /// Processes all rendering commands for a single layer within a specific tile.
263 ///
264 /// This handles the complex logic of composing filtered layers by:
265 /// 1. Running normal rendering commands in sequence.
266 /// 2. When encountering a filtered layer reference, compositing its pre-rendered
267 /// content from the layer manager.
268 /// 3. Skipping the filtered layer's internal commands (already rendered separately).
269 ///
270 /// # Arguments
271 /// * `fine` - The fine rasterizer instance.
272 /// * `x`, `y` - Wide tile coordinates.
273 /// * `layer_id` - The layer being processed.
274 /// * `clear_color` - Initial color for the tile.
275 /// * `layer_manager` - Storage for filtered layer buffers.
276 /// * `encoded_paints` - Paint definitions for the scene.
277 fn process_layer_tile<S: Simd, F: FineKernel<S>>(
278 &self,
279 fine: &mut Fine<S, F>,
280 x: u16,
281 y: u16,
282 layer_id: u32,
283 clear_color: PremulColor,
284 layer_manager: &mut LayerManager,
285 encoded_paints: &[EncodedPaint],
286 ) {
287 let wtile = &self.wide.get(x, y);
288 fine.set_coords(x, y);
289 fine.clear(clear_color);
290
291 // Process all commands in this layer's render range.
292 // Invariant: tiles within a layer's bbox must have commands for that layer.
293 let ranges = wtile.layer_cmd_ranges.get(&layer_id).unwrap();
294
295 let mut cmd_idx = ranges.render_range.start;
296 while cmd_idx < ranges.render_range.end {
297 let cmd: &Cmd = &wtile.cmds[cmd_idx];
298
299 fine.run_cmd(
300 cmd,
301 &self.strip_storage.alphas,
302 encoded_paints,
303 &self.wide.attrs,
304 );
305
306 // Special handling for filtered layer composition.
307 // Filtered layers have already been rendered and stored in layer_manager.
308 // Here we composite them into the current buffer, with special handling for clipping.
309 if let Cmd::PushBuf(LayerKind::Filtered(child_layer_id)) = cmd {
310 // Invariant: PushBuf(Filtered) command must have corresponding layer_cmd_ranges entry.
311 let filtered_ranges = wtile.layer_cmd_ranges.get(child_layer_id).unwrap();
312
313 // Check what comes after the filtered layer push to determine clipping state
314 match wtile.cmds.get(cmd_idx + 1) {
315 // Zero-clip region: tile is completely outside the clip path.
316 // The layer was already rendered for filtering, but we skip compositing
317 // since this tile is entirely clipped out.
318 // (PushZeroClip only appears for clipped filter layers)
319 Some(Cmd::PushZeroClip(id)) if *id == *child_layer_id => {
320 cmd_idx += 1; // Skip the PushZeroClip command
321 }
322
323 // Partial clip: push the clip buffer, then composite the filtered layer
324 Some(Cmd::PushBuf(LayerKind::Clip(_))) => {
325 fine.run_cmd(
326 &wtile.cmds[cmd_idx + 1],
327 &self.strip_storage.alphas,
328 encoded_paints,
329 &self.wide.attrs,
330 );
331 cmd_idx += 1;
332
333 if let Some(mut region) =
334 layer_manager.layer_tile_region_mut(*child_layer_id, x, y)
335 {
336 fine.unpack(&mut region);
337 }
338 }
339
340 // No clip or fully inside clip: composite the filtered layer directly
341 _ => {
342 if let Some(mut region) =
343 layer_manager.layer_tile_region_mut(*child_layer_id, x, y)
344 {
345 fine.unpack(&mut region);
346 }
347 }
348 }
349
350 // Skip past the filtered layer's internal commands, as they were already
351 // rendered when the FilterLayer node was processed earlier.
352 cmd_idx = filtered_ranges.render_range.end.max(cmd_idx + 1);
353 } else {
354 cmd_idx += 1;
355 }
356 }
357 }
358
359 /// Simple rasterization path for scenes without filter effects.
360 ///
361 /// This directly processes each tile's commands without maintaining intermediate
362 /// layer buffers. All rendering happens in a single pass directly to the output buffer.
363 /// This is more efficient than the filter-aware path when no filters are present.
364 fn rasterize_simple<S: Simd, F: FineKernel<S>>(
365 &self,
366 simd: S,
367 buffer: &mut [u8],
368 width: u16,
369 height: u16,
370 encoded_paints: &[EncodedPaint],
371 ) {
372 let mut buffer = Regions::new(width, height, buffer);
373 let mut fine = Fine::<S, F>::new(simd);
374
375 buffer.update_regions(|region| {
376 let x = region.x;
377 let y = region.y;
378
379 let wtile = self.wide.get(x, y);
380 fine.set_coords(x, y);
381
382 // Clear to background and process all commands in order.
383 fine.clear(wtile.bg);
384 for cmd in &wtile.cmds {
385 fine.run_cmd(
386 cmd,
387 &self.strip_storage.alphas,
388 encoded_paints,
389 &self.wide.attrs,
390 );
391 }
392
393 fine.pack(region);
394 });
395 }
396
397 /// Returns true if the scene contains any filter effects.
398 fn has_filters(&self) -> bool {
399 self.render_graph.has_filters()
400 }
401}
402
403impl Dispatcher for SingleThreadedDispatcher {
404 fn wide(&self) -> &Wide {
405 &self.wide
406 }
407
408 fn fill_path(
409 &mut self,
410 path: &BezPath,
411 fill_rule: Fill,
412 transform: Affine,
413 paint: Paint,
414 blend_mode: BlendMode,
415 aliasing_threshold: Option<u8>,
416 mask: Option<Mask>,
417 encoded_paints: &[EncodedPaint],
418 ) {
419 let wide = &mut self.wide;
420
421 // Convert path to coverage strips.
422 self.strip_generator.generate_filled_path(
423 path,
424 fill_rule,
425 transform,
426 aliasing_threshold,
427 &mut self.strip_storage,
428 self.clip_context.get(),
429 );
430
431 // Generate coarse-level commands from strips (layer_id 0 = root layer).
432 wide.generate(
433 &self.strip_storage.strips,
434 paint,
435 blend_mode,
436 0,
437 mask,
438 encoded_paints,
439 );
440 }
441
442 fn stroke_path(
443 &mut self,
444 path: &BezPath,
445 stroke: &Stroke,
446 transform: Affine,
447 paint: Paint,
448 blend_mode: BlendMode,
449 aliasing_threshold: Option<u8>,
450 mask: Option<Mask>,
451 encoded_paints: &[EncodedPaint],
452 ) {
453 let wide = &mut self.wide;
454
455 // Convert stroked path to coverage strips.
456 self.strip_generator.generate_stroked_path(
457 path,
458 stroke,
459 transform,
460 aliasing_threshold,
461 &mut self.strip_storage,
462 self.clip_context.get(),
463 );
464
465 // Generate coarse-level commands from strips (layer_id 0 = root layer).
466 wide.generate(
467 &self.strip_storage.strips,
468 paint,
469 blend_mode,
470 0,
471 mask,
472 encoded_paints,
473 );
474 }
475
476 fn push_layer(
477 &mut self,
478 clip_path: Option<&BezPath>,
479 fill_rule: Fill,
480 transform: Affine,
481 blend_mode: BlendMode,
482 opacity: f32,
483 aliasing_threshold: Option<u8>,
484 mask: Option<Mask>,
485 filter: Option<Filter>,
486 ) {
487 // Allocate a new unique layer ID.
488 self.layer_id_next += 1;
489
490 // Generate clip coverage if a clip path is provided.
491 let clip = if let Some(c) = clip_path {
492 self.strip_generator.generate_filled_path(
493 c,
494 fill_rule,
495 transform,
496 aliasing_threshold,
497 &mut self.strip_storage,
498 self.clip_context.get(),
499 );
500
501 Some(self.strip_storage.strips.as_slice())
502 } else {
503 None
504 };
505
506 // Push the layer onto the coarse tile stack and update render graph.
507 self.wide.push_layer(
508 self.layer_id_next,
509 clip,
510 blend_mode,
511 mask,
512 opacity,
513 filter,
514 transform,
515 &mut self.render_graph,
516 0,
517 );
518 }
519
520 fn pop_layer(&mut self) {
521 // Pop the current layer and update render graph.
522 self.wide.pop_layer(&mut self.render_graph);
523 }
524
525 fn reset(&mut self) {
526 // Clear all rendering state to prepare for a new scene.
527 self.wide.reset();
528 self.clip_context.reset();
529 self.strip_generator.reset();
530 self.strip_storage.clear();
531 self.render_graph.clear();
532 self.layer_id_next = 0;
533
534 // Recreate root node as node 0 (required for proper execution order).
535 let root_node = self.render_graph.add_node(RenderNodeKind::RootLayer {
536 layer_id: 0,
537 wtile_bbox: WideTilesBbox::new([
538 0,
539 0,
540 self.wide.width_tiles(),
541 self.wide.height_tiles(),
542 ]),
543 });
544 debug_assert_eq!(root_node, 0, "Root node must be node 0");
545
546 // Reset layer ID counter.
547 self.layer_id_next = 0;
548 }
549
550 fn flush(&mut self, _encoded_paints: &[EncodedPaint]) {
551 // No-op for single-threaded dispatcher (no work queue to flush).
552 }
553
554 fn rasterize(
555 &self,
556 buffer: &mut [u8],
557 render_mode: RenderMode,
558 width: u16,
559 height: u16,
560 encoded_paints: &[EncodedPaint],
561 ) {
562 // If only the u8 pipeline is enabled, then use it
563 #[cfg(all(feature = "u8_pipeline", not(feature = "f32_pipeline")))]
564 {
565 let _ = render_mode;
566 self.rasterize_u8(buffer, width, height, encoded_paints);
567 }
568
569 // If only the f32 pipeline is enabled, then use it
570 #[cfg(all(feature = "f32_pipeline", not(feature = "u8_pipeline")))]
571 {
572 let _ = render_mode;
573 self.rasterize_f32(buffer, width, height, encoded_paints);
574 }
575
576 // If both pipelines are enabled, select precision based on render mode parameter.
577 #[cfg(all(feature = "u8_pipeline", feature = "f32_pipeline"))]
578 match render_mode {
579 RenderMode::OptimizeSpeed => {
580 // Use u8 precision for faster rendering.
581 self.rasterize_u8(buffer, width, height, encoded_paints);
582 }
583 RenderMode::OptimizeQuality => {
584 // Use f32 precision for higher quality.
585 self.rasterize_f32(buffer, width, height, encoded_paints);
586 }
587 }
588
589 #[cfg(all(not(feature = "u8_pipeline"), not(feature = "f32_pipeline")))]
590 {
591 // This case never gets hit because there is a compile_error in the root.
592 // But have this code disables some warnings and makes the compile error easier to read
593 let _ = (buffer, render_mode, width, height, encoded_paints);
594 }
595 }
596
597 fn generate_wide_cmd(
598 &mut self,
599 strip_buf: &[Strip],
600 paint: Paint,
601 blend_mode: BlendMode,
602 encoded_paints: &[EncodedPaint],
603 ) {
604 // Generate coarse-level commands from pre-computed strips (layer_id 0 = root layer).
605 self.wide
606 .generate(strip_buf, paint, blend_mode, 0, None, encoded_paints);
607 }
608
609 fn strip_storage_mut(&mut self) -> &mut StripStorage {
610 &mut self.strip_storage
611 }
612
613 fn push_clip_path(
614 &mut self,
615 path: &BezPath,
616 fill_rule: Fill,
617 transform: Affine,
618 aliasing_threshold: Option<u8>,
619 ) {
620 self.clip_context.push_clip(
621 path,
622 &mut self.strip_generator,
623 fill_rule,
624 transform,
625 aliasing_threshold,
626 );
627 }
628
629 fn pop_clip_path(&mut self) {
630 self.clip_context.pop_clip();
631 }
632}
633
634/// Saves a filtered pixmap to disk for debugging purposes.
635/// Only available in debug builds with `std` and `png` features enabled.
636#[allow(
637 dead_code,
638 reason = "useful debug utility, can be enabled by uncommenting the call site"
639)]
640#[cfg(all(debug_assertions, feature = "std", feature = "png"))]
641fn save_filtered_layer_debug(pixmap: &Pixmap, layer_id: u32) {
642 use std::path::PathBuf;
643
644 let diffs_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../vello_sparse_tests/diffs");
645 let _ = std::fs::create_dir_all(&diffs_path);
646 let filename = diffs_path.join(alloc::format!("filtered_layer_{}.png", layer_id));
647
648 if let Ok(png_data) = pixmap.clone().into_png() {
649 let _ = std::fs::write(&filename, &png_data);
650 }
651}
652
653#[cfg(test)]
654mod tests {
655 use super::*;
656 use crate::kurbo::Rect;
657 use vello_common::color::palette::css::BLUE;
658 use vello_common::kurbo::Shape;
659 use vello_common::paint::PremulColor;
660
661 /// Verifies that `reset()` properly clears all internal buffers and state.
662 ///
663 /// This is important to ensure that a dispatcher can be reused for multiple
664 /// rendering passes without accumulating stale data from previous frames.
665 #[test]
666 fn buffers_cleared_on_reset() {
667 let mut dispatcher = SingleThreadedDispatcher::new(100, 100, Level::new());
668
669 // Render a simple shape to populate internal buffers.
670 dispatcher.fill_path(
671 &Rect::new(0.0, 0.0, 50.0, 50.0).to_path(0.1),
672 Fill::NonZero,
673 Affine::IDENTITY,
674 Paint::Solid(PremulColor::from_alpha_color(BLUE)),
675 BlendMode::default(),
676 None,
677 None,
678 &[],
679 );
680
681 // Ensure there is data to clear.
682 assert!(!dispatcher.strip_storage.alphas.is_empty());
683 assert!(!dispatcher.wide.get(0, 0).cmds.is_empty());
684
685 dispatcher.reset();
686
687 // Verify all buffers are cleared.
688 assert!(dispatcher.strip_storage.alphas.is_empty());
689 assert!(dispatcher.wide.get(0, 0).cmds.is_empty());
690 assert_eq!(dispatcher.layer_id_next, 0);
691 }
692}