vello_common/strip.rs
1// Copyright 2025 the Vello Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Rendering strips.
5
6use crate::flatten::Line;
7use crate::peniko::Fill;
8use crate::tile::{Tile, Tiles};
9use crate::util::f32_to_u8;
10use alloc::vec::Vec;
11use fearless_simd::*;
12
13/// A strip.
14#[derive(Debug, Clone, Copy)]
15pub struct Strip {
16 /// The x coordinate of the strip, in user coordinates.
17 pub x: u16,
18 /// The y coordinate of the strip, in user coordinates.
19 pub y: u16,
20 /// Packed alpha index and fill gap flag.
21 ///
22 /// Bit layout (u32):
23 /// - bit 31: `fill_gap` (See `Strip::fill_gap()`).
24 /// - bits 0..=30: `alpha_idx` (See `Strip::alpha_idx()`).
25 packed_alpha_idx_fill_gap: u32,
26}
27
28impl Strip {
29 /// The bit mask for `fill_gap` packed into `packed_alpha_idx_fill_gap`.
30 const FILL_GAP_MASK: u32 = 1 << 31;
31
32 /// Creates a new strip.
33 pub fn new(x: u16, y: u16, alpha_idx: u32, fill_gap: bool) -> Self {
34 // Ensure `alpha_idx` does not collide with the fill flag bit.
35 assert!(
36 alpha_idx & Self::FILL_GAP_MASK == 0,
37 "`alpha_idx` too large"
38 );
39 let fill_gap = u32::from(fill_gap) << 31;
40 Self {
41 x,
42 y,
43 packed_alpha_idx_fill_gap: alpha_idx | fill_gap,
44 }
45 }
46
47 /// Returns the y coordinate of the strip, in strip units.
48 pub fn strip_y(&self) -> u16 {
49 self.y / Tile::HEIGHT
50 }
51
52 /// Returns the alpha index.
53 #[inline(always)]
54 pub fn alpha_idx(&self) -> u32 {
55 self.packed_alpha_idx_fill_gap & !Self::FILL_GAP_MASK
56 }
57
58 /// Sets the alpha index.
59 ///
60 /// Note that the largest value that can be stored in the alpha index is `u32::MAX << 1`, as the
61 /// highest bit is reserved for `fill_gap`.
62 #[inline(always)]
63 pub fn set_alpha_idx(&mut self, alpha_idx: u32) {
64 // Ensure `alpha_idx` does not collide with the fill flag bit.
65 assert!(
66 alpha_idx & Self::FILL_GAP_MASK == 0,
67 "`alpha_idx` too large"
68 );
69 let fill_gap = self.packed_alpha_idx_fill_gap & Self::FILL_GAP_MASK;
70 self.packed_alpha_idx_fill_gap = alpha_idx | fill_gap;
71 }
72
73 /// Returns whether the gap that lies between this strip and the previous in the same row should be filled.
74 #[inline(always)]
75 pub fn fill_gap(&self) -> bool {
76 (self.packed_alpha_idx_fill_gap & Self::FILL_GAP_MASK) != 0
77 }
78
79 /// Sets whether the gap that lies between this strip and the previous in the same row should be filled.
80 #[inline(always)]
81 pub fn set_fill_gap(&mut self, fill: bool) {
82 let fill = u32::from(fill) << 31;
83 self.packed_alpha_idx_fill_gap =
84 (self.packed_alpha_idx_fill_gap & !Self::FILL_GAP_MASK) | fill;
85 }
86}
87
88/// Render the tiles stored in `tiles` into the strip and alpha buffer.
89pub fn render(
90 level: Level,
91 tiles: &Tiles,
92 strip_buf: &mut Vec<Strip>,
93 alpha_buf: &mut Vec<u8>,
94 fill_rule: Fill,
95 aliasing_threshold: Option<u8>,
96 lines: &[Line],
97) {
98 dispatch!(level, simd => render_impl(simd, tiles, strip_buf, alpha_buf, fill_rule, aliasing_threshold, lines));
99}
100
101fn render_impl<S: Simd>(
102 s: S,
103 tiles: &Tiles,
104 strip_buf: &mut Vec<Strip>,
105 alpha_buf: &mut Vec<u8>,
106 fill_rule: Fill,
107 aliasing_threshold: Option<u8>,
108 lines: &[Line],
109) {
110 if tiles.is_empty() {
111 return;
112 }
113
114 let should_fill = |winding: i32| match fill_rule {
115 Fill::NonZero => winding != 0,
116 Fill::EvenOdd => winding % 2 != 0,
117 };
118
119 // The accumulated tile winding delta. A line that crosses the top edge of a tile
120 // increments the delta if the line is directed upwards, and decrements it if goes
121 // downwards. Horizontal lines leave it unchanged.
122 let mut winding_delta: i32 = 0;
123
124 // The previous tile visited.
125 let mut prev_tile = *tiles.get(0);
126 // The accumulated (fractional) winding of the tile-sized location we're currently at.
127 // Note multiple tiles can be at the same location.
128 // Note that we are also implicitly assuming here that the tile height exactly fits into a
129 // SIMD vector (i.e. 128 bits).
130 let mut location_winding = [f32x4::splat(s, 0.0); Tile::WIDTH as usize];
131 // The accumulated (fractional) windings at this location's right edge. When we move to the
132 // next location, this is splatted to that location's starting winding.
133 let mut accumulated_winding = f32x4::splat(s, 0.0);
134
135 /// A special tile to keep the logic below simple.
136 const SENTINEL: Tile = Tile::new(u16::MAX, u16::MAX, 0, false);
137
138 // The strip we're building.
139 let mut strip = Strip::new(
140 prev_tile.x * Tile::WIDTH,
141 prev_tile.y * Tile::HEIGHT,
142 alpha_buf.len() as u32,
143 false,
144 );
145
146 for (tile_idx, tile) in tiles.iter().copied().chain([SENTINEL]).enumerate() {
147 let line = lines[tile.line_idx() as usize];
148 let tile_left_x = f32::from(tile.x) * f32::from(Tile::WIDTH);
149 let tile_top_y = f32::from(tile.y) * f32::from(Tile::HEIGHT);
150 let p0_x = line.p0.x - tile_left_x;
151 let p0_y = line.p0.y - tile_top_y;
152 let p1_x = line.p1.x - tile_left_x;
153 let p1_y = line.p1.y - tile_top_y;
154
155 // Push out the winding as an alpha mask when we move to the next location (i.e., a tile
156 // without the same location).
157 if !prev_tile.same_loc(&tile) {
158 match fill_rule {
159 Fill::NonZero => {
160 let p1 = f32x4::splat(s, 0.5);
161 let p2 = f32x4::splat(s, 255.0);
162
163 #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
164 for x in 0..Tile::WIDTH as usize {
165 let area = location_winding[x];
166 let coverage = area.abs();
167 let mulled = coverage.madd(p2, p1);
168 // Note that we are not storing the location winding here but the actual
169 // alpha value as f32, so we reuse the variable as a temporary storage.
170 // Also note that we need the `min` here because the winding can be > 1
171 // and thus the calculated alpha value need to be clamped to 255.
172 location_winding[x] = mulled.min(p2);
173 }
174 }
175 Fill::EvenOdd => {
176 let p1 = f32x4::splat(s, 0.5);
177 let p2 = f32x4::splat(s, -2.0);
178 let p3 = f32x4::splat(s, 255.0);
179
180 #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
181 for x in 0..Tile::WIDTH as usize {
182 let area = location_winding[x];
183 let im1 = area.madd(p1, p1).floor();
184 let coverage = p2.madd(im1, area).abs();
185 let mulled = p3.madd(coverage, p1);
186 // TODO: It is possible that, unlike for `NonZero`, we don't need the `min`
187 // here.
188 location_winding[x] = mulled.min(p3);
189 }
190 }
191 };
192
193 let p1 = s.combine_f32x4(location_winding[0], location_winding[1]);
194 let p2 = s.combine_f32x4(location_winding[2], location_winding[3]);
195
196 let mut u8_vals = f32_to_u8(s.combine_f32x8(p1, p2));
197
198 if let Some(aliasing_threshold) = aliasing_threshold {
199 u8_vals = s.select_u8x16(
200 u8_vals.simd_ge(u8x16::splat(s, aliasing_threshold)),
201 u8x16::splat(s, 255),
202 u8x16::splat(s, 0),
203 );
204 }
205
206 alpha_buf.extend_from_slice(&u8_vals.val);
207
208 #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
209 for x in 0..Tile::WIDTH as usize {
210 location_winding[x] = accumulated_winding;
211 }
212 }
213
214 // Push out the strip if we're moving to a next strip.
215 if !prev_tile.same_loc(&tile) && !prev_tile.prev_loc(&tile) {
216 debug_assert_eq!(
217 (prev_tile.x as u32 + 1) * Tile::WIDTH as u32 - strip.x as u32,
218 ((alpha_buf.len() - strip.alpha_idx() as usize) / usize::from(Tile::HEIGHT)) as u32,
219 "The number of columns written to the alpha buffer should equal the number of columns spanned by this strip."
220 );
221 strip_buf.push(strip);
222
223 let is_sentinel = tile_idx == tiles.len() as usize;
224 if !prev_tile.same_row(&tile) {
225 // Emit a final strip in the row if there is non-zero winding for the sparse fill,
226 // or unconditionally if we've reached the sentinel tile to end the path (the
227 // `alpha_idx` field is used for width calculations).
228 if winding_delta != 0 || is_sentinel {
229 strip_buf.push(Strip::new(
230 u16::MAX,
231 prev_tile.y * Tile::HEIGHT,
232 alpha_buf.len() as u32,
233 should_fill(winding_delta),
234 ));
235 }
236
237 winding_delta = 0;
238 accumulated_winding = f32x4::splat(s, 0.0);
239
240 #[expect(clippy::needless_range_loop, reason = "dimension clarity")]
241 for x in 0..Tile::WIDTH as usize {
242 location_winding[x] = accumulated_winding;
243 }
244 }
245
246 if is_sentinel {
247 break;
248 }
249
250 strip = Strip::new(
251 tile.x * Tile::WIDTH,
252 tile.y * Tile::HEIGHT,
253 alpha_buf.len() as u32,
254 should_fill(winding_delta),
255 );
256 // Note: this fill is mathematically not necessary. It provides a way to reduce
257 // accumulation of float rounding errors.
258 accumulated_winding = f32x4::splat(s, winding_delta as f32);
259 }
260 prev_tile = tile;
261
262 // TODO: horizontal geometry has no impact on winding. This branch will be removed when
263 // horizontal geometry is culled at the tile-generation stage.
264 if p0_y == p1_y {
265 continue;
266 }
267
268 // Lines moving upwards (in a y-down coordinate system) add to winding; lines moving
269 // downwards subtract from winding.
270 let sign = (p0_y - p1_y).signum();
271
272 // Calculate winding / pixel area coverage.
273 //
274 // Conceptually, horizontal rays are shot from left to right. Every time the ray crosses a
275 // line that is directed upwards (decreasing `y`), the winding is incremented. Every time
276 // the ray crosses a line moving downwards (increasing `y`), the winding is decremented.
277 // The fractional area coverage of a pixel is the integral of the winding within it.
278 //
279 // Practically, to calculate this, each pixel is considered individually, and we determine
280 // whether the line moves through this pixel. The line's y-delta within this pixel is
281 // accumulated and added to the area coverage of pixels to the right. Within the pixel
282 // itself, the area to the right of the line segment forms a trapezoid (or a triangle in
283 // the degenerate case). The area of this trapezoid is added to the pixel's area coverage.
284 //
285 // For example, consider the following pixel square, with a line indicated by asterisks
286 // starting inside the pixel and crossing its bottom edge. The area covered is the
287 // trapezoid on the bottom-right enclosed by the line and the pixel square. The area is
288 // positive if the line moves down, and negative otherwise.
289 //
290 // __________________
291 // | |
292 // | *------|
293 // | * |
294 // | * |
295 // | * |
296 // | * |
297 // | * |
298 // |___*____________|
299 // *
300 // *
301
302 let (line_top_y, line_top_x, line_bottom_y, line_bottom_x) = if p0_y < p1_y {
303 (p0_y, p0_x, p1_y, p1_x)
304 } else {
305 (p1_y, p1_x, p0_y, p0_x)
306 };
307
308 let (line_left_x, line_left_y, line_right_x) = if p0_x < p1_x {
309 (p0_x, p0_y, p1_x)
310 } else {
311 (p1_x, p1_y, p0_x)
312 };
313
314 let y_slope = (line_bottom_y - line_top_y) / (line_bottom_x - line_top_x);
315 let x_slope = 1. / y_slope;
316
317 winding_delta += sign as i32 * i32::from(tile.winding());
318
319 // TODO: this should be removed when out-of-viewport tiles are culled at the
320 // tile-generation stage. That requires calculating and forwarding winding to strip
321 // generation.
322 if tile.x == 0 && line_left_x < 0. {
323 let (ymin, ymax) = if line.p0.x == line.p1.x {
324 (line_top_y, line_bottom_y)
325 } else {
326 let line_viewport_left_y = (line_top_y - line_top_x * y_slope)
327 .max(line_top_y)
328 .min(line_bottom_y);
329
330 (
331 f32::min(line_left_y, line_viewport_left_y),
332 f32::max(line_left_y, line_viewport_left_y),
333 )
334 };
335
336 let ymin: f32x4<_> = ymin.simd_into(s);
337 let ymax: f32x4<_> = ymax.simd_into(s);
338
339 let px_top_y: f32x4<_> = [0.0, 1.0, 2.0, 3.0].simd_into(s);
340 let px_bottom_y = 1.0 + px_top_y;
341 let ymin = px_top_y.max(ymin);
342 let ymax = px_bottom_y.min(ymax);
343 let h = (ymax - ymin).max(0.0);
344 accumulated_winding = h.madd(sign, accumulated_winding);
345 for x_idx in 0..Tile::WIDTH {
346 location_winding[x_idx as usize] = h.madd(sign, location_winding[x_idx as usize]);
347 }
348
349 if line_right_x < 0. {
350 // Early exit, as no part of the line is inside the tile.
351 continue;
352 }
353 }
354
355 let line_top_y = f32x4::splat(s, line_top_y);
356 let line_bottom_y = f32x4::splat(s, line_bottom_y);
357
358 let y_idx = f32x4::from_slice(s, &[0.0, 1.0, 2.0, 3.0]);
359 let px_top_y = y_idx;
360 let px_bottom_y = 1. + y_idx;
361
362 let ymin = line_top_y.max(px_top_y);
363 let ymax = line_bottom_y.min(px_bottom_y);
364
365 let mut acc = f32x4::splat(s, 0.0);
366
367 for x_idx in 0..Tile::WIDTH {
368 let x_idx_s = f32x4::splat(s, x_idx as f32);
369 let px_left_x = x_idx_s;
370 let px_right_x = 1.0 + x_idx_s;
371
372 // The y-coordinate of the intersections between the line and the pixel's left and
373 // right edges respectively.
374 //
375 // There is some subtlety going on here: `y_slope` will usually be finite, but will
376 // be `inf` for purely vertical lines (`p0_x == p1_x`).
377 //
378 // In the case of `inf`, the resulting slope calculation will be `-inf` or `inf`
379 // depending on whether the pixel edge is left or right of the line, respectively
380 // (from the viewport's coordinate system perspective). The `min` and `max`
381 // y-clamping logic generalizes nicely, as a pixel edge to the left of the line is
382 // clamped to `ymin`, and a pixel edge to the right is clamped to `ymax`.
383 //
384 // In the special case where a vertical line and pixel edge are at the exact same
385 // x-position (collinear), the line belongs to the pixel on whose _left_ edge it is
386 // situated. The resulting slope calculation for the edge the line is situated on
387 // will be NaN, as `0 * inf` results in NaN. This is true for both the left and
388 // right edge. In both cases, the call to `f32::max` will set this to `ymin`.
389 let line_px_left_y = (px_left_x - line_top_x)
390 .madd(y_slope, line_top_y)
391 .max_precise(ymin)
392 .min_precise(ymax);
393 let line_px_right_y = (px_right_x - line_top_x)
394 .madd(y_slope, line_top_y)
395 .max_precise(ymin)
396 .min_precise(ymax);
397
398 // `x_slope` is always finite, as horizontal geometry is elided.
399 let line_px_left_yx =
400 (line_px_left_y - line_top_y).madd(x_slope, f32x4::splat(s, line_top_x));
401 let line_px_right_yx =
402 (line_px_right_y - line_top_y).madd(x_slope, f32x4::splat(s, line_top_x));
403 let h = (line_px_right_y - line_px_left_y).abs();
404
405 // The trapezoidal area enclosed between the line and the right edge of the pixel
406 // square.
407 let area = 0.5 * h * (2. * px_right_x - line_px_right_yx - line_px_left_yx);
408 location_winding[x_idx as usize] += area.madd(sign, acc);
409 acc = h.madd(sign, acc);
410 }
411
412 accumulated_winding += acc;
413 }
414}