vello_common/
strip.rs

1// Copyright 2025 the Vello Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Rendering strips.
5
6use crate::flatten::Line;
7use crate::peniko::Fill;
8use crate::tile::{Tile, Tiles};
9use crate::util::f32_to_u8;
10use alloc::vec::Vec;
11use fearless_simd::*;
12
13/// A strip.
14#[derive(Debug, Clone, Copy)]
15pub struct Strip {
16    /// The x coordinate of the strip, in user coordinates.
17    pub x: u16,
18    /// The y coordinate of the strip, in user coordinates.
19    pub y: u16,
20    /// The index into the alpha buffer.
21    pub alpha_idx: u32,
22    /// The winding number at the start of the strip.
23    pub winding: i32,
24}
25
26impl Strip {
27    /// Return the y coordinate of the strip, in strip units.
28    pub fn strip_y(&self) -> u16 {
29        self.y / Tile::HEIGHT
30    }
31}
32
33/// Render the tiles stored in `tiles` into the strip and alpha buffer.
34/// The strip buffer will be cleared in the beginning.
35pub fn render(
36    level: Level,
37    tiles: &Tiles,
38    strip_buf: &mut Vec<Strip>,
39    alpha_buf: &mut Vec<u8>,
40    fill_rule: Fill,
41    anti_aliasing: bool,
42    lines: &[Line],
43) {
44    render_dispatch(
45        level,
46        tiles,
47        strip_buf,
48        alpha_buf,
49        fill_rule,
50        anti_aliasing,
51        lines,
52    );
53}
54
55simd_dispatch!(render_dispatch(
56    level,
57    tiles: &Tiles,
58    strip_buf: &mut Vec<Strip>,
59    alpha_buf: &mut Vec<u8>,
60    fill_rule: Fill,
61    anti_aliasing: bool,
62    lines: &[Line],
63) = render_impl);
64
65fn render_impl<S: Simd>(
66    s: S,
67    tiles: &Tiles,
68    strip_buf: &mut Vec<Strip>,
69    alpha_buf: &mut Vec<u8>,
70    fill_rule: Fill,
71    anti_aliasing: bool,
72    lines: &[Line],
73) {
74    strip_buf.clear();
75
76    if tiles.is_empty() {
77        return;
78    }
79
80    // The accumulated tile winding delta. A line that crosses the top edge of a tile
81    // increments the delta if the line is directed upwards, and decrements it if goes
82    // downwards. Horizontal lines leave it unchanged.
83    let mut winding_delta: i32 = 0;
84
85    // The previous tile visited.
86    let mut prev_tile = *tiles.get(0);
87    // The accumulated (fractional) winding of the tile-sized location we're currently at.
88    // Note multiple tiles can be at the same location.
89    // Note that we are also implicitly assuming here that the tile height exactly fits into a
90    // SIMD vector (i.e. 128 bits).
91    let mut location_winding = [f32x4::splat(s, 0.0); Tile::WIDTH as usize];
92    // The accumulated (fractional) windings at this location's right edge. When we move to the
93    // next location, this is splatted to that location's starting winding.
94    let mut accumulated_winding = f32x4::splat(s, 0.0);
95
96    /// A special tile to keep the logic below simple.
97    const SENTINEL: Tile = Tile::new(u16::MAX, u16::MAX, 0, false);
98
99    // The strip we're building.
100    let mut strip = Strip {
101        x: prev_tile.x * Tile::WIDTH,
102        y: prev_tile.y * Tile::HEIGHT,
103        alpha_idx: alpha_buf.len() as u32,
104        winding: 0,
105    };
106
107    for (tile_idx, tile) in tiles.iter().copied().chain([SENTINEL]).enumerate() {
108        let line = lines[tile.line_idx() as usize];
109        let tile_left_x = f32::from(tile.x) * f32::from(Tile::WIDTH);
110        let tile_top_y = f32::from(tile.y) * f32::from(Tile::HEIGHT);
111        let p0_x = line.p0.x - tile_left_x;
112        let p0_y = line.p0.y - tile_top_y;
113        let p1_x = line.p1.x - tile_left_x;
114        let p1_y = line.p1.y - tile_top_y;
115
116        // Push out the winding as an alpha mask when we move to the next location (i.e., a tile
117        // without the same location).
118        if !prev_tile.same_loc(&tile) {
119            match fill_rule {
120                Fill::NonZero => {
121                    let p1 = f32x4::splat(s, 0.5);
122                    let p2 = f32x4::splat(s, 255.0);
123
124                    #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
125                    for x in 0..Tile::WIDTH as usize {
126                        let area = location_winding[x];
127                        let coverage = area.abs();
128                        let mulled = p1.madd(coverage, p2);
129                        // Note that we are not storing the location winding here but the actual
130                        // alpha value as f32, so we reuse the variable as a temporary storage.
131                        // Also note that we need the `min` here because the winding can be > 1
132                        // and thus the calculated alpha value need to be clamped to 255.
133                        location_winding[x] = mulled.min(p2);
134                    }
135                }
136                Fill::EvenOdd => {
137                    let p1 = f32x4::splat(s, 0.5);
138                    let p2 = f32x4::splat(s, -2.0);
139                    let p3 = f32x4::splat(s, 255.0);
140
141                    #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
142                    for x in 0..Tile::WIDTH as usize {
143                        let area = location_winding[x];
144                        let im1 = p1.madd(area, p1).floor();
145                        let coverage = area.madd(p2, im1).abs();
146                        let mulled = p1.madd(p3, coverage);
147                        // TODO: It is possible that, unlike for `NonZero`, we don't need the `min`
148                        // here.
149                        location_winding[x] = mulled.min(p3);
150                    }
151                }
152            };
153
154            let p1 = s.combine_f32x4(location_winding[0], location_winding[1]);
155            let p2 = s.combine_f32x4(location_winding[2], location_winding[3]);
156
157            let mut u8_vals = f32_to_u8(s.combine_f32x8(p1, p2));
158
159            if !anti_aliasing {
160                u8_vals = s.select_u8x16(
161                    u8_vals.simd_ge(u8x16::splat(s, 128)),
162                    u8x16::splat(s, 255),
163                    u8x16::splat(s, 0),
164                );
165            }
166
167            alpha_buf.extend_from_slice(&u8_vals.val);
168
169            #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
170            for x in 0..Tile::WIDTH as usize {
171                location_winding[x] = accumulated_winding;
172            }
173        }
174
175        // Push out the strip if we're moving to a next strip.
176        if !prev_tile.same_loc(&tile) && !prev_tile.prev_loc(&tile) {
177            debug_assert_eq!(
178                (prev_tile.x + 1) * Tile::WIDTH - strip.x,
179                ((alpha_buf.len() - strip.alpha_idx as usize) / usize::from(Tile::HEIGHT)) as u16,
180                "The number of columns written to the alpha buffer should equal the number of columns spanned by this strip."
181            );
182            strip_buf.push(strip);
183
184            let is_sentinel = tile_idx == tiles.len() as usize;
185            if !prev_tile.same_row(&tile) {
186                // Emit a final strip in the row if there is non-zero winding for the sparse fill,
187                // or unconditionally if we've reached the sentinel tile to end the path (the
188                // `alpha_idx` field is used for width calculations).
189                if winding_delta != 0 || is_sentinel {
190                    strip_buf.push(Strip {
191                        x: u16::MAX,
192                        y: prev_tile.y * Tile::HEIGHT,
193                        alpha_idx: alpha_buf.len() as u32,
194                        winding: winding_delta,
195                    });
196                }
197
198                winding_delta = 0;
199                accumulated_winding = f32x4::splat(s, 0.0);
200
201                #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
202                for x in 0..Tile::WIDTH as usize {
203                    location_winding[x] = accumulated_winding;
204                }
205            }
206
207            if is_sentinel {
208                break;
209            }
210
211            strip = Strip {
212                x: tile.x * Tile::WIDTH,
213                y: tile.y * Tile::HEIGHT,
214                alpha_idx: alpha_buf.len() as u32,
215                winding: winding_delta,
216            };
217            // Note: this fill is mathematically not necessary. It provides a way to reduce
218            // accumulation of float rounding errors.
219            accumulated_winding = f32x4::splat(s, winding_delta as f32);
220        }
221        prev_tile = tile;
222
223        // TODO: horizontal geometry has no impact on winding. This branch will be removed when
224        // horizontal geometry is culled at the tile-generation stage.
225        if p0_y == p1_y {
226            continue;
227        }
228
229        // Lines moving upwards (in a y-down coordinate system) add to winding; lines moving
230        // downwards subtract from winding.
231        let sign = (p0_y - p1_y).signum();
232
233        // Calculate winding / pixel area coverage.
234        //
235        // Conceptually, horizontal rays are shot from left to right. Every time the ray crosses a
236        // line that is directed upwards (decreasing `y`), the winding is incremented. Every time
237        // the ray crosses a line moving downwards (increasing `y`), the winding is decremented.
238        // The fractional area coverage of a pixel is the integral of the winding within it.
239        //
240        // Practically, to calculate this, each pixel is considered individually, and we determine
241        // whether the line moves through this pixel. The line's y-delta within this pixel is
242        // accumulated and added to the area coverage of pixels to the right. Within the pixel
243        // itself, the area to the right of the line segment forms a trapezoid (or a triangle in
244        // the degenerate case). The area of this trapezoid is added to the pixel's area coverage.
245        //
246        // For example, consider the following pixel square, with a line indicated by asterisks
247        // starting inside the pixel and crossing its bottom edge. The area covered is the
248        // trapezoid on the bottom-right enclosed by the line and the pixel square. The area is
249        // positive if the line moves down, and negative otherwise.
250        //
251        //  __________________
252        //  |                |
253        //  |         *------|
254        //  |        *       |
255        //  |       *        |
256        //  |      *         |
257        //  |     *          |
258        //  |    *           |
259        //  |___*____________|
260        //     *
261        //    *
262
263        let (line_top_y, line_top_x, line_bottom_y, line_bottom_x) = if p0_y < p1_y {
264            (p0_y, p0_x, p1_y, p1_x)
265        } else {
266            (p1_y, p1_x, p0_y, p0_x)
267        };
268
269        let (line_left_x, line_left_y, line_right_x) = if p0_x < p1_x {
270            (p0_x, p0_y, p1_x)
271        } else {
272            (p1_x, p1_y, p0_x)
273        };
274
275        let y_slope = (line_bottom_y - line_top_y) / (line_bottom_x - line_top_x);
276        let x_slope = 1. / y_slope;
277
278        winding_delta += sign as i32 * i32::from(tile.winding());
279
280        // TODO: this should be removed when out-of-viewport tiles are culled at the
281        // tile-generation stage. That requires calculating and forwarding winding to strip
282        // generation.
283        if tile.x == 0 && line_left_x < 0. {
284            let (ymin, ymax) = if line.p0.x == line.p1.x {
285                (line_top_y, line_bottom_y)
286            } else {
287                let line_viewport_left_y = (line_top_y - line_top_x * y_slope)
288                    .max(line_top_y)
289                    .min(line_bottom_y);
290
291                (
292                    f32::min(line_left_y, line_viewport_left_y),
293                    f32::max(line_left_y, line_viewport_left_y),
294                )
295            };
296
297            let ymin: f32x4<_> = ymin.simd_into(s);
298            let ymax: f32x4<_> = ymax.simd_into(s);
299
300            let px_top_y: f32x4<_> = [0.0, 1.0, 2.0, 3.0].simd_into(s);
301            let px_bottom_y = 1.0 + px_top_y;
302            let ymin = px_top_y.max(ymin);
303            let ymax = px_bottom_y.min(ymax);
304            let h = (ymax - ymin).max(0.0);
305            accumulated_winding = accumulated_winding.madd(sign, h);
306            for x_idx in 0..Tile::WIDTH {
307                location_winding[x_idx as usize] = location_winding[x_idx as usize].madd(sign, h);
308            }
309
310            if line_right_x < 0. {
311                // Early exit, as no part of the line is inside the tile.
312                continue;
313            }
314        }
315
316        let line_top_y = f32x4::splat(s, line_top_y);
317        let line_bottom_y = f32x4::splat(s, line_bottom_y);
318
319        let y_idx = f32x4::from_slice(s, &[0.0, 1.0, 2.0, 3.0]);
320        let px_top_y = y_idx;
321        let px_bottom_y = 1. + y_idx;
322
323        let ymin = line_top_y.max(px_top_y);
324        let ymax = line_bottom_y.min(px_bottom_y);
325
326        let mut acc = f32x4::splat(s, 0.0);
327
328        for x_idx in 0..Tile::WIDTH {
329            let x_idx_s = f32x4::splat(s, x_idx as f32);
330            let px_left_x = x_idx_s;
331            let px_right_x = 1.0 + x_idx_s;
332
333            // The y-coordinate of the intersections between the line and the pixel's left and
334            // right edges respectively.
335            //
336            // There is some subtlety going on here: `y_slope` will usually be finite, but will
337            // be `inf` for purely vertical lines (`p0_x == p1_x`).
338            //
339            // In the case of `inf`, the resulting slope calculation will be `-inf` or `inf`
340            // depending on whether the pixel edge is left or right of the line, respectively
341            // (from the viewport's coordinate system perspective). The `min` and `max`
342            // y-clamping logic generalizes nicely, as a pixel edge to the left of the line is
343            // clamped to `ymin`, and a pixel edge to the right is clamped to `ymax`.
344            //
345            // In the special case where a vertical line and pixel edge are at the exact same
346            // x-position (collinear), the line belongs to the pixel on whose _left_ edge it is
347            // situated. The resulting slope calculation for the edge the line is situated on
348            // will be NaN, as `0 * inf` results in NaN. This is true for both the left and
349            // right edge. In both cases, the call to `f32::max` will set this to `ymin`.
350            let line_px_left_y = line_top_y
351                .madd(px_left_x - line_top_x, y_slope)
352                .max_precise(ymin)
353                .min_precise(ymax);
354            let line_px_right_y = line_top_y
355                .madd(px_right_x - line_top_x, y_slope)
356                .max_precise(ymin)
357                .min_precise(ymax);
358
359            // `x_slope` is always finite, as horizontal geometry is elided.
360            let line_px_left_yx =
361                f32x4::splat(s, line_top_x).madd(line_px_left_y - line_top_y, x_slope);
362            let line_px_right_yx =
363                f32x4::splat(s, line_top_x).madd(line_px_right_y - line_top_y, x_slope);
364            let h = (line_px_right_y - line_px_left_y).abs();
365
366            // The trapezoidal area enclosed between the line and the right edge of the pixel
367            // square.
368            let area = 0.5 * h * (2. * px_right_x - line_px_right_yx - line_px_left_yx);
369            location_winding[x_idx as usize] =
370                location_winding[x_idx as usize] + acc.madd(sign, area);
371            acc = acc.madd(sign, h);
372        }
373
374        accumulated_winding = accumulated_winding + acc;
375    }
376}