Skip to main content

vello_common/
strip.rs

1// Copyright 2025 the Vello Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Rendering strips.
5
6use crate::flatten::Line;
7use crate::peniko::Fill;
8use crate::tile::{Tile, Tiles};
9use crate::util::f32_to_u8;
10use alloc::vec::Vec;
11use fearless_simd::*;
12
13/// A strip.
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub struct Strip {
16    /// The x coordinate of the strip, in user coordinates.
17    pub x: u16,
18    /// The y coordinate of the strip, in user coordinates.
19    pub y: u16,
20    /// Packed alpha index and fill gap flag.
21    ///
22    /// Bit layout (u32):
23    /// - bit 31: `fill_gap` (See `Strip::fill_gap()`).
24    /// - bits 0..=30: `alpha_idx` (See `Strip::alpha_idx()`).
25    packed_alpha_idx_fill_gap: u32,
26}
27
28impl Strip {
29    /// The bit mask for `fill_gap` packed into `packed_alpha_idx_fill_gap`.
30    const FILL_GAP_MASK: u32 = 1 << 31;
31
32    /// Creates a new strip.
33    pub fn new(x: u16, y: u16, alpha_idx: u32, fill_gap: bool) -> Self {
34        // Ensure `alpha_idx` does not collide with the fill flag bit.
35        assert!(
36            alpha_idx & Self::FILL_GAP_MASK == 0,
37            "`alpha_idx` too large"
38        );
39        let fill_gap = u32::from(fill_gap) << 31;
40        Self {
41            x,
42            y,
43            packed_alpha_idx_fill_gap: alpha_idx | fill_gap,
44        }
45    }
46
47    /// Return whether the strip is a sentinel strip.
48    pub fn is_sentinel(&self) -> bool {
49        self.x == u16::MAX
50    }
51
52    /// Return the y coordinate of the strip, in strip units.
53    pub fn strip_y(&self) -> u16 {
54        self.y / Tile::HEIGHT
55    }
56
57    /// Returns the alpha index.
58    #[inline(always)]
59    pub fn alpha_idx(&self) -> u32 {
60        self.packed_alpha_idx_fill_gap & !Self::FILL_GAP_MASK
61    }
62
63    /// Sets the alpha index.
64    ///
65    /// Note that the largest value that can be stored in the alpha index is `u32::MAX << 1`, as the
66    /// highest bit is reserved for `fill_gap`.
67    #[inline(always)]
68    pub fn set_alpha_idx(&mut self, alpha_idx: u32) {
69        // Ensure `alpha_idx` does not collide with the fill flag bit.
70        assert!(
71            alpha_idx & Self::FILL_GAP_MASK == 0,
72            "`alpha_idx` too large"
73        );
74        let fill_gap = self.packed_alpha_idx_fill_gap & Self::FILL_GAP_MASK;
75        self.packed_alpha_idx_fill_gap = alpha_idx | fill_gap;
76    }
77
78    /// Returns whether the gap that lies between this strip and the previous in the same row should be filled.
79    #[inline(always)]
80    pub fn fill_gap(&self) -> bool {
81        (self.packed_alpha_idx_fill_gap & Self::FILL_GAP_MASK) != 0
82    }
83
84    /// Sets whether the gap that lies between this strip and the previous in the same row should be filled.
85    #[inline(always)]
86    pub fn set_fill_gap(&mut self, fill: bool) {
87        let fill = u32::from(fill) << 31;
88        self.packed_alpha_idx_fill_gap =
89            (self.packed_alpha_idx_fill_gap & !Self::FILL_GAP_MASK) | fill;
90    }
91}
92
93/// Render the tiles stored in `tiles` into the strip and alpha buffer.
94pub fn render(
95    level: Level,
96    tiles: &Tiles,
97    strip_buf: &mut Vec<Strip>,
98    alpha_buf: &mut Vec<u8>,
99    fill_rule: Fill,
100    aliasing_threshold: Option<u8>,
101    lines: &[Line],
102) {
103    dispatch!(level, simd => render_impl(simd, tiles, strip_buf, alpha_buf, fill_rule, aliasing_threshold, lines));
104}
105
106fn render_impl<S: Simd>(
107    s: S,
108    tiles: &Tiles,
109    strip_buf: &mut Vec<Strip>,
110    alpha_buf: &mut Vec<u8>,
111    fill_rule: Fill,
112    aliasing_threshold: Option<u8>,
113    lines: &[Line],
114) {
115    if tiles.is_empty() {
116        return;
117    }
118
119    let should_fill = |winding: i32| match fill_rule {
120        Fill::NonZero => winding != 0,
121        Fill::EvenOdd => winding % 2 != 0,
122    };
123
124    // The accumulated tile winding delta. A line that crosses the top edge of a tile
125    // increments the delta if the line is directed upwards, and decrements it if goes
126    // downwards. Horizontal lines leave it unchanged.
127    let mut winding_delta: i32 = 0;
128
129    // The previous tile visited.
130    let mut prev_tile = *tiles.get(0);
131    // The accumulated (fractional) winding of the tile-sized location we're currently at.
132    // Note multiple tiles can be at the same location.
133    // Note that we are also implicitly assuming here that the tile height exactly fits into a
134    // SIMD vector (i.e. 128 bits).
135    let mut location_winding = [f32x4::splat(s, 0.0); Tile::WIDTH as usize];
136    // The accumulated (fractional) windings at this location's right edge. When we move to the
137    // next location, this is splatted to that location's starting winding.
138    let mut accumulated_winding = f32x4::splat(s, 0.0);
139
140    /// A special tile to keep the logic below simple.
141    const SENTINEL: Tile = Tile::new(u16::MAX, u16::MAX, 0, 0);
142
143    // The strip we're building.
144    let mut strip = Strip::new(
145        prev_tile.x * Tile::WIDTH,
146        prev_tile.y * Tile::HEIGHT,
147        alpha_buf.len() as u32,
148        false,
149    );
150
151    for (tile_idx, tile) in tiles.iter().copied().chain([SENTINEL]).enumerate() {
152        let line = lines[tile.line_idx() as usize];
153        let tile_left_x = f32::from(tile.x) * f32::from(Tile::WIDTH);
154        let tile_top_y = f32::from(tile.y) * f32::from(Tile::HEIGHT);
155        let p0_x = line.p0.x - tile_left_x;
156        let p0_y = line.p0.y - tile_top_y;
157        let p1_x = line.p1.x - tile_left_x;
158        let p1_y = line.p1.y - tile_top_y;
159
160        // Push out the winding as an alpha mask when we move to the next location (i.e., a tile
161        // without the same location).
162        if !prev_tile.same_loc(&tile) {
163            match fill_rule {
164                Fill::NonZero => {
165                    let p1 = f32x4::splat(s, 0.5);
166                    let p2 = f32x4::splat(s, 255.0);
167
168                    #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
169                    for x in 0..Tile::WIDTH as usize {
170                        let area = location_winding[x];
171                        let coverage = area.abs();
172                        let mulled = coverage.madd(p2, p1);
173                        // Note that we are not storing the location winding here but the actual
174                        // alpha value as f32, so we reuse the variable as a temporary storage.
175                        // Also note that we need the `min` here because the winding can be > 1
176                        // and thus the calculated alpha value need to be clamped to 255.
177                        location_winding[x] = mulled.min(p2);
178                    }
179                }
180                Fill::EvenOdd => {
181                    let p1 = f32x4::splat(s, 0.5);
182                    let p2 = f32x4::splat(s, -2.0);
183                    let p3 = f32x4::splat(s, 255.0);
184
185                    #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
186                    for x in 0..Tile::WIDTH as usize {
187                        let area = location_winding[x];
188                        let im1 = area.madd(p1, p1).floor();
189                        let coverage = p2.madd(im1, area).abs();
190                        let mulled = p3.madd(coverage, p1);
191                        // TODO: It is possible that, unlike for `NonZero`, we don't need the `min`
192                        // here.
193                        location_winding[x] = mulled.min(p3);
194                    }
195                }
196            };
197
198            let p1 = s.combine_f32x4(location_winding[0], location_winding[1]);
199            let p2 = s.combine_f32x4(location_winding[2], location_winding[3]);
200
201            let mut u8_vals = f32_to_u8(s.combine_f32x8(p1, p2));
202
203            if let Some(aliasing_threshold) = aliasing_threshold {
204                u8_vals = s.select_u8x16(
205                    u8_vals.simd_ge(u8x16::splat(s, aliasing_threshold)),
206                    u8x16::splat(s, 255),
207                    u8x16::splat(s, 0),
208                );
209            }
210
211            alpha_buf.extend_from_slice(u8_vals.as_slice());
212
213            #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
214            for x in 0..Tile::WIDTH as usize {
215                location_winding[x] = accumulated_winding;
216            }
217        }
218
219        // Push out the strip if we're moving to a next strip.
220        if !prev_tile.same_loc(&tile) && !prev_tile.prev_loc(&tile) {
221            debug_assert_eq!(
222                (prev_tile.x as u32 + 1) * Tile::WIDTH as u32 - strip.x as u32,
223                ((alpha_buf.len() - strip.alpha_idx() as usize) / usize::from(Tile::HEIGHT)) as u32,
224                "The number of columns written to the alpha buffer should equal the number of columns spanned by this strip."
225            );
226            strip_buf.push(strip);
227
228            let is_sentinel = tile_idx == tiles.len() as usize;
229            if !prev_tile.same_row(&tile) {
230                // Emit a final strip in the row if there is non-zero winding for the sparse fill,
231                // or unconditionally if we've reached the sentinel tile to end the path (the
232                // `alpha_idx` field is used for width calculations).
233                if winding_delta != 0 || is_sentinel {
234                    strip_buf.push(Strip::new(
235                        u16::MAX,
236                        prev_tile.y * Tile::HEIGHT,
237                        alpha_buf.len() as u32,
238                        should_fill(winding_delta),
239                    ));
240                }
241
242                winding_delta = 0;
243                accumulated_winding = f32x4::splat(s, 0.0);
244
245                #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
246                for x in 0..Tile::WIDTH as usize {
247                    location_winding[x] = accumulated_winding;
248                }
249            }
250
251            if is_sentinel {
252                break;
253            }
254
255            strip = Strip::new(
256                tile.x * Tile::WIDTH,
257                tile.y * Tile::HEIGHT,
258                alpha_buf.len() as u32,
259                should_fill(winding_delta),
260            );
261            // Note: this fill is mathematically not necessary. It provides a way to reduce
262            // accumulation of float rounding errors.
263            accumulated_winding = f32x4::splat(s, winding_delta as f32);
264        }
265        prev_tile = tile;
266
267        // TODO: horizontal geometry has no impact on winding. This branch will be removed when
268        // horizontal geometry is culled at the tile-generation stage.
269        if p0_y == p1_y {
270            continue;
271        }
272
273        // Lines moving upwards (in a y-down coordinate system) add to winding; lines moving
274        // downwards subtract from winding.
275        let sign = (p0_y - p1_y).signum();
276
277        // Calculate winding / pixel area coverage.
278        //
279        // Conceptually, horizontal rays are shot from left to right. Every time the ray crosses a
280        // line that is directed upwards (decreasing `y`), the winding is incremented. Every time
281        // the ray crosses a line moving downwards (increasing `y`), the winding is decremented.
282        // The fractional area coverage of a pixel is the integral of the winding within it.
283        //
284        // Practically, to calculate this, each pixel is considered individually, and we determine
285        // whether the line moves through this pixel. The line's y-delta within this pixel is
286        // accumulated and added to the area coverage of pixels to the right. Within the pixel
287        // itself, the area to the right of the line segment forms a trapezoid (or a triangle in
288        // the degenerate case). The area of this trapezoid is added to the pixel's area coverage.
289        //
290        // For example, consider the following pixel square, with a line indicated by asterisks
291        // starting inside the pixel and crossing its bottom edge. The area covered is the
292        // trapezoid on the bottom-right enclosed by the line and the pixel square. The area is
293        // positive if the line moves down, and negative otherwise.
294        //
295        //  __________________
296        //  |                |
297        //  |         *------|
298        //  |        *       |
299        //  |       *        |
300        //  |      *         |
301        //  |     *          |
302        //  |    *           |
303        //  |___*____________|
304        //     *
305        //    *
306
307        let (line_top_y, line_top_x, line_bottom_y, line_bottom_x) = if p0_y < p1_y {
308            (p0_y, p0_x, p1_y, p1_x)
309        } else {
310            (p1_y, p1_x, p0_y, p0_x)
311        };
312
313        let (line_left_x, line_left_y, line_right_x) = if p0_x < p1_x {
314            (p0_x, p0_y, p1_x)
315        } else {
316            (p1_x, p1_y, p0_x)
317        };
318
319        let y_slope = (line_bottom_y - line_top_y) / (line_bottom_x - line_top_x);
320        let x_slope = 1. / y_slope;
321
322        winding_delta += sign as i32 * i32::from(tile.winding());
323
324        // TODO: this should be removed when out-of-viewport tiles are culled at the
325        // tile-generation stage. That requires calculating and forwarding winding to strip
326        // generation.
327        if tile.x == 0 && line_left_x < 0. {
328            let (ymin, ymax) = if line.p0.x == line.p1.x {
329                (line_top_y, line_bottom_y)
330            } else {
331                let line_viewport_left_y = (line_top_y - line_top_x * y_slope)
332                    .max(line_top_y)
333                    .min(line_bottom_y);
334
335                (
336                    f32::min(line_left_y, line_viewport_left_y),
337                    f32::max(line_left_y, line_viewport_left_y),
338                )
339            };
340
341            let ymin: f32x4<_> = ymin.simd_into(s);
342            let ymax: f32x4<_> = ymax.simd_into(s);
343
344            let px_top_y: f32x4<_> = [0.0, 1.0, 2.0, 3.0].simd_into(s);
345            let px_bottom_y = 1.0 + px_top_y;
346            let ymin = px_top_y.max(ymin);
347            let ymax = px_bottom_y.min(ymax);
348            let h = (ymax - ymin).max(0.0);
349            accumulated_winding = h.madd(sign, accumulated_winding);
350            for x_idx in 0..Tile::WIDTH {
351                location_winding[x_idx as usize] = h.madd(sign, location_winding[x_idx as usize]);
352            }
353
354            if line_right_x < 0. {
355                // Early exit, as no part of the line is inside the tile.
356                continue;
357            }
358        }
359
360        let line_top_y = f32x4::splat(s, line_top_y);
361        let line_bottom_y = f32x4::splat(s, line_bottom_y);
362
363        let y_idx = f32x4::from_slice(s, &[0.0, 1.0, 2.0, 3.0]);
364        let px_top_y = y_idx;
365        let px_bottom_y = 1. + y_idx;
366
367        let ymin = line_top_y.max(px_top_y);
368        let ymax = line_bottom_y.min(px_bottom_y);
369
370        let mut acc = f32x4::splat(s, 0.0);
371
372        for x_idx in 0..Tile::WIDTH {
373            let x_idx_s = f32x4::splat(s, x_idx as f32);
374            let px_left_x = x_idx_s;
375            let px_right_x = 1.0 + x_idx_s;
376
377            // The y-coordinate of the intersections between the line and the pixel's left and
378            // right edges respectively.
379            //
380            // There is some subtlety going on here: `y_slope` will usually be finite, but will
381            // be `inf` for purely vertical lines (`p0_x == p1_x`).
382            //
383            // In the case of `inf`, the resulting slope calculation will be `-inf` or `inf`
384            // depending on whether the pixel edge is left or right of the line, respectively
385            // (from the viewport's coordinate system perspective). The `min` and `max`
386            // y-clamping logic generalizes nicely, as a pixel edge to the left of the line is
387            // clamped to `ymin`, and a pixel edge to the right is clamped to `ymax`.
388            //
389            // In the special case where a vertical line and pixel edge are at the exact same
390            // x-position (collinear), the line belongs to the pixel on whose _left_ edge it is
391            // situated. The resulting slope calculation for the edge the line is situated on
392            // will be NaN, as `0 * inf` results in NaN. This is true for both the left and
393            // right edge. In both cases, the call to `f32::max` will set this to `ymin`.
394            let line_px_left_y = (px_left_x - line_top_x)
395                .madd(y_slope, line_top_y)
396                .max_precise(ymin)
397                .min_precise(ymax);
398            let line_px_right_y = (px_right_x - line_top_x)
399                .madd(y_slope, line_top_y)
400                .max_precise(ymin)
401                .min_precise(ymax);
402
403            // `x_slope` is always finite, as horizontal geometry is elided.
404            let line_px_left_yx =
405                (line_px_left_y - line_top_y).madd(x_slope, f32x4::splat(s, line_top_x));
406            let line_px_right_yx =
407                (line_px_right_y - line_top_y).madd(x_slope, f32x4::splat(s, line_top_x));
408            let h = (line_px_right_y - line_px_left_y).abs();
409
410            // The trapezoidal area enclosed between the line and the right edge of the pixel
411            // square.
412            let area = 0.5 * h * (2. * px_right_x - line_px_right_yx - line_px_left_yx);
413            location_winding[x_idx as usize] += area.madd(sign, acc);
414            acc = h.madd(sign, acc);
415        }
416
417        accumulated_winding += acc;
418    }
419}