vello_cpu/filter/gaussian_blur.rs
1// Copyright 2025 the Vello Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Gaussian blur filter implementation using multi-scale separable convolution.
5//!
6//! This implementation uses a multi-scale approach for efficient blurring:
7//! - **Small blurs** (σ ≤ 2): Direct separable convolution at full resolution
8//! - **Large blurs** (σ > 2): Iterative downsample → blur → upsample pyramid
9//!
10//! The algorithm automatically determines the optimal number of decimation levels
11//! using variance analysis. Each 2× decimation applies a \[1,3,3,1\]/8 binomial filter
12//! (adding variance = 3.0), then downsamples, reducing the remaining blur work needed.
13//! This exploits the variance additivity property: `σ²_total = σ²_downsample + σ²_blur`.
14//!
15//! **Variance Addition Reference**: The convolution of two Gaussians with variances σ₁²
16//! and σ₂² produces a Gaussian with variance σ₁² + σ₂². This fundamental property comes
17//! from probability theory and applies to Gaussian convolution in image processing. See:
18//! - Torralba & Freeman, "Foundations of Computer Vision" (MIT Press), Section 2.2:
19//! <https://visionbook.mit.edu/blurring_2.html#properties-of-the-continuous-gaussian>
20
21use super::FilterEffect;
22use crate::layer_manager::LayerManager;
23use alloc::vec;
24use core::f32::consts::E;
25use vello_common::filter_effects::EdgeMode;
26use vello_common::peniko::color::PremulRgba8;
27#[cfg(not(feature = "std"))]
28use vello_common::peniko::kurbo::common::FloatFuncs as _;
29use vello_common::pixmap::Pixmap;
30
31/// Maximum size of the Gaussian kernel (must be odd).
32/// The multi-scale decimation algorithm guarantees that kernel size never exceeds this value.
33/// Decimation stops when remaining variance ≤ 4.0 (σ ≤ 2.0), which produces kernels of size
34/// at most 13 (radius = ceil(3σ) = 6, size = 1 + 2×6 = 13).
35pub(crate) const MAX_KERNEL_SIZE: usize = 13;
36
37pub(crate) struct GaussianBlur {
38 std_deviation: f32,
39 /// Number of 2× decimation levels to use (0 means no decimation, direct convolution).
40 n_decimations: usize,
41 /// Pre-computed Gaussian kernel weights for the reduced blur.
42 /// Only the first `kernel_size` elements are valid.
43 kernel: [f32; MAX_KERNEL_SIZE],
44 /// Actual length of the kernel (rest is padding up to `MAX_KERNEL_SIZE`).
45 kernel_size: usize,
46 /// Edge mode for handling out-of-bounds sampling.
47 edge_mode: EdgeMode,
48}
49
50impl GaussianBlur {
51 /// Create a new Gaussian blur filter with the specified standard deviation.
52 ///
53 /// This precomputes the decimation plan, kernel, and radius for optimal performance.
54 pub(crate) fn new(std_deviation: f32, edge_mode: EdgeMode) -> Self {
55 let (n_decimations, kernel, kernel_size) = plan_decimated_blur(std_deviation);
56
57 Self {
58 std_deviation,
59 edge_mode,
60 n_decimations,
61 kernel,
62 kernel_size,
63 }
64 }
65}
66
67impl FilterEffect for GaussianBlur {
68 fn execute_lowp(&self, pixmap: &mut Pixmap, layer_manager: &mut LayerManager) {
69 // No blur if std_deviation is zero or negative
70 if self.std_deviation <= 0.0 {
71 return;
72 }
73
74 let scratch = layer_manager.get_scratch_buffer(pixmap.width(), pixmap.height());
75 apply_blur(
76 pixmap,
77 scratch,
78 self.n_decimations,
79 &self.kernel[..self.kernel_size],
80 self.edge_mode,
81 );
82 }
83
84 fn execute_highp(&self, pixmap: &mut Pixmap, layer_manager: &mut LayerManager) {
85 // TODO: Currently only lowp is implemented and used for highp as well.
86 // This needs to be updated to use proper high-precision arithmetic.
87 Self::execute_lowp(self, pixmap, layer_manager);
88 }
89}
90
91/// Compute the blur execution plan based on standard deviation.
92///
93/// Returns (`n_decimations`, `kernel`, `kernel_size`):
94/// - `n_decimations`: Number of 2× downsampling steps to perform (per axis)
95/// - `kernel`: Pre-computed Gaussian kernel weights (fixed-size array)
96/// - `kernel_size`: Actual length of the kernel (rest is zero-padded)
97pub(crate) fn plan_decimated_blur(std_deviation: f32) -> (usize, [f32; MAX_KERNEL_SIZE], usize) {
98 if std_deviation <= 0.0 {
99 // Invalid standard deviation, return identity kernel (no blur)
100 let mut kernel = [0.0; MAX_KERNEL_SIZE];
101 kernel[0] = 1.0;
102 return (0, kernel, 1);
103 }
104
105 // Compute decimation plan using variance analysis.
106 // Variance (σ²) has the additive property: applying two blurs sequentially
107 // adds their variances together. We use this to decompose the blur.
108 //
109 // Mathematical Foundation: From probability theory, convolving two Gaussians
110 // G(σ₁) ⊗ G(σ₂) = G(√(σ₁² + σ₂²)). This means variance is additive: σ²_total = σ²_1 + σ²_2.
111 // Rearranging: σ²_2 = σ²_total - σ²_1, allowing us to decompose the target blur.
112 let variance = std_deviation * std_deviation;
113 let mut n_decimations = 0;
114 let mut remaining_variance = variance;
115
116 // Each decimation step:
117 // 1. Applies [1,3,3,1] blur which adds variance = 0.75 per axis
118 // 2. Downsamples by 2× per axis, adding variance = 0.75 × 4 = 3.0 total
119 // 3. The 2× downsampling scales the remaining variance by 0.25 (= 1/2²)
120 // Stop when remaining variance ≤ 4.0 (σ ≈ 2), as further decimation isn't beneficial.
121 while remaining_variance > 4.0 {
122 remaining_variance = (remaining_variance - 3.0) * 0.25;
123 n_decimations += 1;
124 }
125 // Compute the reduced standard deviation to apply at the decimated resolution
126 let remaining_sigma = remaining_variance.sqrt();
127 // Compute Gaussian kernel for the reduced blur
128 let (kernel, kernel_size) = compute_gaussian_kernel(remaining_sigma);
129
130 (n_decimations, kernel, kernel_size)
131}
132
133/// Compute 1D Gaussian kernel weights for separable convolution.
134///
135/// Returns (`kernel_weights`, `kernel_size`) where `kernel_size = 2×radius + 1`.
136/// The kernel is stored in a fixed-size array to avoid heap allocation.
137/// Uses the standard Gaussian formula: G(x) = exp(-x² / (2σ²)), normalized to sum to 1.
138pub(crate) fn compute_gaussian_kernel(std_deviation: f32) -> ([f32; MAX_KERNEL_SIZE], usize) {
139 // Use radius = 3σ to capture 99.7% of the Gaussian distribution.
140 // Beyond ±3σ, the Gaussian values are negligible (<0.3%).
141 let radius = (3.0 * std_deviation).ceil() as usize;
142 let kernel_size = (1 + radius * 2).min(MAX_KERNEL_SIZE);
143
144 let mut kernel = [0.0; MAX_KERNEL_SIZE];
145 // Compute Gaussian weights using the formula: G(x) = exp(-x² / (2σ²))
146 // This creates a symmetric bell curve centered at the middle of the kernel.
147 let gaussian_denominator = 2.0 * std_deviation * std_deviation;
148 let mut sum = 0.0;
149 let kernel_center = (kernel_size / 2) as f32;
150 for (i, weight) in kernel.iter_mut().enumerate().take(kernel_size) {
151 // Compute distance from center (0 at center, increases outward)
152 let x = (i as f32) - kernel_center;
153 // Apply Gaussian formula: weight decreases exponentially with squared distance
154 *weight = E.powf(-x * x / gaussian_denominator);
155 sum += *weight;
156 }
157
158 // Normalize weights to sum to 1.0, ensuring the blur doesn't change overall brightness.
159 // Without normalization, blurring a uniform gray area could make it brighter/darker.
160 let scale = 1.0 / sum;
161 for weight in kernel.iter_mut().take(kernel_size) {
162 *weight *= scale;
163 }
164
165 (kernel, kernel_size)
166}
167
168/// Apply Gaussian blur using multi-scale decimation and upsampling.
169///
170/// Uses a precomputed decimation plan and kernel for optimal performance.
171/// Operates in-place using a single pixmap buffer with logical dimension tracking
172/// to minimize memory allocations. For `n_decimations=0`, applies direct convolution.
173///
174/// The `scratch` buffer is used for separable convolution and must be at least as
175/// large as the source pixmap.
176pub(crate) fn apply_blur(
177 pixmap: &mut Pixmap,
178 scratch: &mut Pixmap,
179 n_decimations: usize,
180 kernel: &[f32],
181 edge_mode: EdgeMode,
182) {
183 let radius = kernel.len() / 2;
184 let width = pixmap.width();
185 let height = pixmap.height();
186
187 // Small blur: apply direct convolution at full resolution
188 if n_decimations == 0 {
189 convolve(pixmap, scratch, width, height, kernel, radius, edge_mode);
190 return;
191 }
192
193 // Track logical dimensions through decimation (physical buffer stays the same size)
194 let mut src_width = width;
195 let mut src_height = height;
196 let mut dimensions_stack = vec![(width, height)];
197
198 // Downsample n times (each step reduces resolution by 2×)
199 for _ in 0..n_decimations {
200 (src_width, src_height) = downscale(pixmap, src_width, src_height, edge_mode);
201 dimensions_stack.push((src_width, src_height));
202 }
203
204 // Apply the reduced blur at the coarsest resolution
205 convolve(
206 pixmap, scratch, src_width, src_height, kernel, radius, edge_mode,
207 );
208
209 // Upsample back to original resolution (each step doubles resolution by 2×)
210 for _ in 0..n_decimations {
211 dimensions_stack.pop();
212 if let Some(&(target_width, target_height)) = dimensions_stack.last() {
213 (src_width, src_height) = upscale(pixmap, src_width, src_height, edge_mode);
214 // Clamp because upscale can exceed target on odd dimensions (e.g., 5→3→6 > 5)
215 src_width = src_width.min(target_width);
216 src_height = src_height.min(target_height);
217 }
218 }
219
220 debug_assert_eq!(
221 (src_width, src_height),
222 (width, height),
223 "Final dimensions should match original"
224 );
225}
226
227/// Apply separable Gaussian convolution with logical dimensions.
228///
229/// Performs horizontal blur followed by vertical blur. Works with a logical view
230/// of the pixmap, using only the top-left region defined by width × height.
231/// The `temp` buffer is provided by the caller to avoid allocations.
232pub(crate) fn convolve(
233 src: &mut Pixmap,
234 scratch: &mut Pixmap,
235 width: u16,
236 height: u16,
237 kernel: &[f32],
238 radius: usize,
239 edge_mode: EdgeMode,
240) {
241 convolve_x(src, scratch, width, height, kernel, radius, edge_mode);
242 convolve_y(scratch, src, width, height, kernel, radius, edge_mode);
243}
244
245/// Apply horizontal blur pass (1D convolution along x-axis).
246///
247/// For each output pixel, computes a weighted sum of horizontally neighboring pixels
248/// using the Gaussian kernel. Handles edge cases according to the specified edge mode.
249/// Writes results to a destination buffer to avoid overwriting source data.
250pub(crate) fn convolve_x(
251 src: &Pixmap,
252 dst: &mut Pixmap,
253 src_width: u16,
254 src_height: u16,
255 kernel: &[f32],
256 radius: usize,
257 edge_mode: EdgeMode,
258) {
259 for y in 0..src_height {
260 for x in 0..src_width {
261 let mut rgba = [0.0_f32; 4];
262
263 // Sum contributions from all kernel positions: output = Σ(weight[j] × pixel[x+j-radius])
264 for (j, &k) in kernel.iter().enumerate() {
265 let src_x = x as i32 + j as i32 - radius as i32;
266 let p = sample_x(src, src_x, y, src_width, edge_mode);
267
268 rgba[0] += p.r as f32 * k;
269 rgba[1] += p.g as f32 * k;
270 rgba[2] += p.b as f32 * k;
271 rgba[3] += p.a as f32 * k;
272 }
273
274 // Convert back to u8 with rounding
275 dst.set_pixel(
276 x,
277 y,
278 PremulRgba8 {
279 r: rgba[0].round() as u8,
280 g: rgba[1].round() as u8,
281 b: rgba[2].round() as u8,
282 a: rgba[3].round() as u8,
283 },
284 );
285 }
286 }
287}
288
289/// Apply vertical blur pass (1D convolution along y-axis).
290///
291/// For each output pixel, computes a weighted sum of vertically neighboring pixels
292/// using the Gaussian kernel. Handles edge cases according to the specified edge mode.
293/// Writes results to a destination buffer to avoid overwriting source data.
294pub(crate) fn convolve_y(
295 src: &Pixmap,
296 dst: &mut Pixmap,
297 src_width: u16,
298 src_height: u16,
299 kernel: &[f32],
300 radius: usize,
301 edge_mode: EdgeMode,
302) {
303 for y in 0..src_height {
304 for x in 0..src_width {
305 let mut rgba = [0.0_f32; 4];
306
307 // Sum contributions from all kernel positions: output = Σ(weight[j] × pixel[y+j-radius])
308 for (j, &k) in kernel.iter().enumerate() {
309 let src_y = y as i32 + j as i32 - radius as i32;
310 let p = sample_y(src, x, src_y, src_height, edge_mode);
311
312 rgba[0] += p.r as f32 * k;
313 rgba[1] += p.g as f32 * k;
314 rgba[2] += p.b as f32 * k;
315 rgba[3] += p.a as f32 * k;
316 }
317
318 // Convert back to u8 with rounding
319 dst.set_pixel(
320 x,
321 y,
322 PremulRgba8 {
323 r: rgba[0].round() as u8,
324 g: rgba[1].round() as u8,
325 b: rgba[2].round() as u8,
326 a: rgba[3].round() as u8,
327 },
328 );
329 }
330 }
331}
332
333/// Downsample image by 2x using separable \[1,3,3,1\]/8 binomial filter.
334///
335/// Performs horizontal and vertical decimation in sequence. Returns the new
336/// logical dimensions (ceil(width/2), ceil(height/2)).
337pub(crate) fn downscale(
338 src: &mut Pixmap,
339 src_width: u16,
340 src_height: u16,
341 edge_mode: EdgeMode,
342) -> (u16, u16) {
343 let dst_width = src_width.div_ceil(2);
344 let dst_height = src_height.div_ceil(2);
345 downscale_x(src, src_width, src_height, dst_width, edge_mode);
346 downscale_y(src, src_width, src_height, dst_height, edge_mode);
347 (dst_width, dst_height)
348}
349
350/// Horizontal decimation pass using \[1,3,3,1\]/8 filter.
351///
352/// Reduces width by 2x while applying a binomial blur kernel. The \[1,3,3,1\] weights
353/// approximate a Gaussian and contribute variance=0.75 before the 2x downsampling.
354fn downscale_x(
355 src: &mut Pixmap,
356 src_width: u16,
357 src_height: u16,
358 dst_width: u16,
359 edge_mode: EdgeMode,
360) {
361 for y in 0..src_height {
362 // Sliding window: maintains 2 previous pixels (x0, x1) to form 4-tap filter
363 // with current pixels (x2, x3). Start with sample at -1 for implicit left padding.
364 let mut p0 = sample_x(src, -1, y, src_width, edge_mode);
365 let mut p1 = sample_x(src, 0, y, src_width, edge_mode);
366
367 for x in 0..dst_width {
368 // Sample 4 horizontally adjacent pixels for [1,3,3,1] kernel
369 // Pattern: [x*2-1, x*2, x*2+1, x*2+2]
370 let src_x = (x * 2) as i32;
371 let p2 = sample_x(src, src_x + 1, y, src_width, edge_mode);
372 let p3 = sample_x(src, src_x + 2, y, src_width, edge_mode);
373
374 // Apply [1,3,3,1]/8 weights → output = (p0 + 3×p1 + 3×p2 + p3) / 8
375 src.set_pixel(x, y, decimate_weighted(p0, p1, p2, p3));
376
377 // Advance window: previous p2,p3 become next p0,p1
378 p0 = p2;
379 p1 = p3;
380 }
381 }
382}
383
384/// Vertical decimation pass using \[1,3,3,1\]/8 filter.
385///
386/// Reduces logical height by 2x while applying a binomial blur kernel.
387/// Operates in-place by writing to the beginning of the same buffer.
388fn downscale_y(
389 src: &mut Pixmap,
390 src_width: u16,
391 src_height: u16,
392 dst_height: u16,
393 edge_mode: EdgeMode,
394) {
395 for x in 0..src_width {
396 // Sliding window: maintains 2 previous pixels (y0, y1) to form 4-tap filter
397 // with current pixels (y2, y3). Start with sample at -1 for implicit top padding.
398 let mut p0 = sample_y(src, x, -1, src_height, edge_mode);
399 let mut p1 = sample_y(src, x, 0, src_height, edge_mode);
400
401 for y in 0..dst_height {
402 // Sample 4 vertically adjacent pixels for [1,3,3,1] kernel
403 // Pattern: [y*2-1, y*2, y*2+1, y*2+2]
404 let src_y = (y * 2) as i32;
405 let p2 = sample_y(src, x, src_y + 1, src_height, edge_mode);
406 let p3 = sample_y(src, x, src_y + 2, src_height, edge_mode);
407
408 // Apply [1,3,3,1]/8 weights → output = (p0 + 3×p1 + 3×p2 + p3) / 8
409 src.set_pixel(x, y, decimate_weighted(p0, p1, p2, p3));
410
411 // Advance window: previous p2,p3 become next p0,p1
412 p0 = p2;
413 p1 = p3;
414 }
415 }
416}
417
418/// Upsample a pixmap by 2x using linear interpolation with [0.75, 0.25] weights.
419///
420/// Uses separable passes: horizontal doubling followed by vertical doubling.
421///
422/// ## Phase Alignment Theory
423///
424/// The downsampling \[1,3,3,1\]/8 filter creates a half-pixel offset. Its center of mass
425/// is at position `2k + 0.5`, not `2k`. This means each downsampled pixel at discrete
426/// position `k` represents a sample at continuous position `2k + 0.5`.
427///
428/// When upsampling, we reconstruct pixels at positions `2k` and `2k+1` from downsampled
429/// pixels whose centers are at `..., 2k-1.5, 2k+0.5, 2k+2.5, ...`
430///
431/// **Interpolation weights** are derived from linear interpolation based on distances:
432/// - Position `2k`: distance 0.5 from center at `2k+0.5`, distance 1.5 from center at `2k-1.5`
433/// → weights: 0.75×pixel\[k\] + 0.25×pixel\[k-1\]
434/// - Position `2k+1`: distance 0.5 from center at `2k+0.5`, distance 1.5 from center at `2k+2.5`
435/// → weights: 0.75×pixel\[k\] + 0.25×pixel\[k+1\]
436pub(crate) fn upscale(
437 src: &mut Pixmap,
438 src_width: u16,
439 src_height: u16,
440 edge_mode: EdgeMode,
441) -> (u16, u16) {
442 let dst_width = src_width * 2;
443 let dst_height = src_height * 2;
444 upscale_x(src, src_width, src_height, edge_mode);
445 upscale_y(src, dst_width, src_height, edge_mode);
446 (dst_width, dst_height)
447}
448
449/// Horizontal upsampling pass using [0.75, 0.25] interpolation with logical dimensions.
450///
451/// Doubles the logical width using phase-aligned interpolation. Each input pixel
452/// generates two output pixels with different weights based on their distance from
453/// the downsampled pixel's center position.
454/// Operates in-place by processing backwards to avoid overwriting source data.
455fn upscale_x(src: &mut Pixmap, src_width: u16, src_height: u16, edge_mode: EdgeMode) {
456 // Process backwards (right to left) to avoid overwriting source data
457 for y in 0..src_height {
458 // Maintain sliding window of three pixels: prev, current, next
459 // This allows us to compute both output pixels that depend on current pixel x
460 let mut p0 = sample_x(src, src_width as i32 + 1, y, src_width, edge_mode);
461 let mut p1 = sample_x(src, src_width as i32, y, src_width, edge_mode);
462
463 for x in (0..src_width).rev() {
464 let src_x = x as i32;
465 let p2 = sample_x(src, src_x - 1, y, src_width, edge_mode);
466
467 // Generate two output pixels per input with phase-aligned interpolation:
468 // output[2x] = 0.25×p2 + 0.75×p1 (position 2x is 0.5 from center at 2x+0.5)
469 // output[2x+1] = 0.75×p1 + 0.25×p0 (position 2x+1 is 0.5 from center at 2x+0.5)
470 let dst_x = x * 2;
471 src.set_pixel(dst_x, y, interpolate_25_75(p2, p1));
472 src.set_pixel(dst_x + 1, y, interpolate_75_25(p1, p0));
473
474 // Advance sliding window for next iteration
475 p0 = p1;
476 p1 = p2;
477 }
478 }
479}
480
481/// Vertical upsampling pass using [0.75, 0.25] interpolation with logical dimensions.
482///
483/// Doubles the logical height using phase-aligned interpolation. Each input pixel
484/// generates two output pixels with different weights based on their distance from
485/// the downsampled pixel's center position.
486/// Operates in-place by processing backwards to avoid overwriting source data.
487fn upscale_y(src: &mut Pixmap, src_width: u16, src_height: u16, edge_mode: EdgeMode) {
488 // Process backwards (bottom to top) to avoid overwriting source data
489 for x in 0..src_width {
490 // Maintain sliding window of three pixels: prev, current, next
491 // This allows us to compute both output pixels that depend on current pixel y
492 let mut p0 = sample_y(src, x, src_height as i32 + 1, src_height, edge_mode);
493 let mut p1 = sample_y(src, x, src_height as i32, src_height, edge_mode);
494
495 for y in (0..src_height).rev() {
496 let src_y = y as i32;
497 let p2 = sample_y(src, x, src_y - 1, src_height, edge_mode);
498
499 // Generate two output rows per input with phase-aligned interpolation:
500 // output[2y] = 0.25×p2 + 0.75×p1 (position 2y is 0.5 from center at 2y+0.5)
501 // output[2y+1] = 0.75×p1 + 0.25×p0 (position 2y+1 is 0.5 from center at 2y+0.5)
502 let dst_y = y * 2;
503 src.set_pixel(x, dst_y, interpolate_25_75(p2, p1));
504 src.set_pixel(x, dst_y + 1, interpolate_75_25(p1, p0));
505
506 // Advance sliding window for next iteration
507 p0 = p1;
508 p1 = p2;
509 }
510 }
511}
512
513/// Transparent black pixel constant.
514const TRANSPARENT_BLACK: PremulRgba8 = PremulRgba8 {
515 r: 0,
516 g: 0,
517 b: 0,
518 a: 0,
519};
520
521/// Sample a pixel with edge mode handling for horizontal sampling.
522#[inline(always)]
523fn sample_x(src: &Pixmap, x: i32, y: u16, width: u16, edge_mode: EdgeMode) -> PremulRgba8 {
524 sample(x, width, edge_mode, |src_x| src.sample(src_x, y))
525}
526
527/// Sample a pixel with edge mode handling for vertical sampling.
528#[inline(always)]
529fn sample_y(src: &Pixmap, x: u16, y: i32, height: u16, edge_mode: EdgeMode) -> PremulRgba8 {
530 sample(y, height, edge_mode, |src_y| src.sample(x, src_y))
531}
532
533/// Sample a pixel with edge mode handling (generic implementation).
534///
535/// Handles both horizontal and vertical sampling based on the provided closure.
536/// The `sample_fn` closure receives the clamped/extended coordinate and returns the pixel.
537/// For `EdgeMode::None`, returns transparent black if the coordinate is out of bounds.
538#[inline(always)]
539fn sample<F>(coord: i32, size: u16, edge_mode: EdgeMode, sample_fn: F) -> PremulRgba8
540where
541 F: FnOnce(u16) -> PremulRgba8,
542{
543 // For EdgeMode::None, return transparent black if out of bounds
544 if edge_mode == EdgeMode::None && (coord < 0 || coord >= size as i32) {
545 return TRANSPARENT_BLACK;
546 }
547 let extended_coord = extend(coord, size, edge_mode);
548 sample_fn(extended_coord)
549}
550
551/// Extend a coordinate beyond image boundaries according to the edge mode.
552///
553/// Transforms out-of-bounds coordinates for sampling: clamped, wrapped, or mirrored
554/// depending on the mode. For `EdgeMode::None`, the coordinate is guaranteed to be
555/// in-bounds (already checked by caller, which returns transparent black for out-of-bounds).
556#[inline(always)]
557fn extend(coord: i32, size: u16, edge_mode: EdgeMode) -> u16 {
558 match edge_mode {
559 EdgeMode::Duplicate => {
560 // Clamp to image bounds: pixels outside use nearest edge pixel
561 coord.clamp(0, size as i32 - 1) as u16
562 }
563 EdgeMode::None => {
564 // Coordinate is already validated as in-bounds by caller
565 coord as u16
566 }
567 EdgeMode::Wrap => {
568 // Wrap around using modulo: image tiles infinitely
569 let mut c = coord % size as i32;
570 if c < 0 {
571 c += size as i32;
572 }
573 c as u16
574 }
575 EdgeMode::Mirror => {
576 // Mirror at boundaries: image reflects across edges
577 let period = size as i32 * 2;
578 let mut c = coord % period;
579 if c < 0 {
580 c += period;
581 }
582 if c >= size as i32 {
583 c = period - c - 1;
584 }
585 c as u16
586 }
587 }
588}
589
590/// Blend 4 RGBA pixels using \[1,3,3,1\]/8 binomial weights.
591///
592/// Computes `(p0 + 3×p1 + 3×p2 + p3) / 8` using efficient integer arithmetic (right shift).
593/// This binomial pattern approximates a Gaussian and contributes variance=0.75 to the blur.
594/// Adds 4 before the shift to implement round-to-nearest instead of floor division.
595#[inline(always)]
596fn decimate_weighted(
597 p0: PremulRgba8,
598 p1: PremulRgba8,
599 p2: PremulRgba8,
600 p3: PremulRgba8,
601) -> PremulRgba8 {
602 let r = ((p0.r as u32 + p1.r as u32 * 3 + p2.r as u32 * 3 + p3.r as u32 + 4) >> 3) as u8;
603 let g = ((p0.g as u32 + p1.g as u32 * 3 + p2.g as u32 * 3 + p3.g as u32 + 4) >> 3) as u8;
604 let b = ((p0.b as u32 + p1.b as u32 * 3 + p2.b as u32 * 3 + p3.b as u32 + 4) >> 3) as u8;
605 let a = ((p0.a as u32 + p1.a as u32 * 3 + p2.a as u32 * 3 + p3.a as u32 + 4) >> 3) as u8;
606 PremulRgba8 { r, g, b, a }
607}
608
609/// Blend 2 RGBA pixels using [0.25, 0.75] weights (right-weighted interpolation).
610///
611/// Used for upsampling to generate the second of two output pixels.
612/// Favors the right/next pixel (p1) with 75% weight.
613/// Adds 2 before the shift to implement round-to-nearest instead of floor division.
614#[inline(always)]
615fn interpolate_25_75(p0: PremulRgba8, p1: PremulRgba8) -> PremulRgba8 {
616 let r = ((p0.r as u32 + p1.r as u32 * 3 + 2) >> 2) as u8;
617 let g = ((p0.g as u32 + p1.g as u32 * 3 + 2) >> 2) as u8;
618 let b = ((p0.b as u32 + p1.b as u32 * 3 + 2) >> 2) as u8;
619 let a = ((p0.a as u32 + p1.a as u32 * 3 + 2) >> 2) as u8;
620 PremulRgba8 { r, g, b, a }
621}
622
623/// Blend 2 RGBA pixels using [0.75, 0.25] weights.
624///
625/// Computes `0.75×p0 + 0.25×p1` using efficient integer arithmetic.
626/// Used during upsampling for positions closer to the first pixel.
627/// Adds 2 before the shift to implement round-to-nearest instead of floor division.
628#[inline(always)]
629fn interpolate_75_25(p0: PremulRgba8, p1: PremulRgba8) -> PremulRgba8 {
630 let r = ((p0.r as u32 * 3 + p1.r as u32 + 2) >> 2) as u8;
631 let g = ((p0.g as u32 * 3 + p1.g as u32 + 2) >> 2) as u8;
632 let b = ((p0.b as u32 * 3 + p1.b as u32 + 2) >> 2) as u8;
633 let a = ((p0.a as u32 * 3 + p1.a as u32 + 2) >> 2) as u8;
634 PremulRgba8 { r, g, b, a }
635}
636
637#[cfg(test)]
638mod tests {
639 use super::*;
640
641 /// Test Gaussian kernel computation for small σ.
642 #[test]
643 fn test_gaussian_kernel_small_sigma() {
644 let (kernel, size) = compute_gaussian_kernel(1.0);
645 // For σ=1.0, radius = ceil(3.0) = 3, size = 2*3+1 = 7
646 assert_eq!(size, 7);
647
648 // Kernel should be symmetric
649 for i in 0..size / 2 {
650 assert!((kernel[i] - kernel[size - 1 - i]).abs() < 1e-6);
651 }
652
653 // Kernel should sum to 1.0 (normalized)
654 let sum: f32 = kernel.iter().take(size).sum();
655 assert!((sum - 1.0).abs() < 1e-6);
656
657 // Center should be the largest weight
658 let center_idx = size / 2;
659 for i in 0..size {
660 if i != center_idx {
661 assert!(kernel[center_idx] >= kernel[i]);
662 }
663 }
664 }
665
666 /// Test Gaussian kernel computation for very small σ (near-zero).
667 #[test]
668 fn test_gaussian_kernel_very_small_sigma() {
669 let (kernel, size) = compute_gaussian_kernel(0.1);
670 // For σ=0.1, radius = ceil(0.3) = 1, size = 3
671 assert_eq!(size, 3);
672 // Should sum to 1.0
673 let sum: f32 = kernel.iter().take(size).sum();
674 assert!((sum - 1.0).abs() < 1e-6);
675 // Center weight should be dominant for very small σ
676 assert!(kernel[1] > 0.9); // Center is highly weighted
677 }
678
679 /// Test Gaussian kernel for fractional σ.
680 #[test]
681 fn test_gaussian_kernel_fractional_sigma() {
682 let (kernel, size) = compute_gaussian_kernel(0.5);
683 // For σ=0.5, radius = ceil(1.5) = 2, size = 5
684 assert_eq!(size, 5);
685
686 // Should still sum to 1.0
687 let sum: f32 = kernel.iter().take(size).sum();
688 assert!((sum - 1.0).abs() < 1e-6);
689 }
690
691 /// Test decimation plan for small blur (no decimation).
692 #[test]
693 fn test_plan_no_decimation() {
694 let (n_decimations, _kernel, _size) = plan_decimated_blur(1.0);
695 // σ=1.0 → variance=1.0, should not decimate
696 assert_eq!(n_decimations, 0);
697 }
698
699 /// Test decimation plan for medium blur (some decimation).
700 #[test]
701 fn test_plan_with_decimation() {
702 let (n_decimations, _kernel, _size) = plan_decimated_blur(5.0);
703 // σ=5.0 → variance=25.0, should decimate
704 assert_eq!(n_decimations, 2);
705 }
706
707 /// Test decimation plan at boundary (σ=2.0).
708 #[test]
709 fn test_plan_decimation_boundary() {
710 let (n_decimations, _kernel, _size) = plan_decimated_blur(2.0);
711 // σ=2.0 → variance=4.0, right at the boundary
712 assert_eq!(n_decimations, 0);
713 }
714
715 /// Test decimation plan for negative σ (invalid, should return identity).
716 #[test]
717 fn test_plan_negative_sigma() {
718 let (n_decimations, kernel, size) = plan_decimated_blur(-1.0);
719 assert_eq!(n_decimations, 0);
720 assert_eq!(size, 1);
721 assert!((kernel[0] - 1.0).abs() < 1e-6);
722 }
723
724 /// Test edge extension with Duplicate mode.
725 #[test]
726 fn test_extend_duplicate() {
727 let size = 10;
728
729 // In-bounds coordinate
730 assert_eq!(extend(5, size, EdgeMode::Duplicate), 5);
731
732 // Below bounds: clamp to 0
733 assert_eq!(extend(-1, size, EdgeMode::Duplicate), 0);
734 assert_eq!(extend(-10, size, EdgeMode::Duplicate), 0);
735
736 // Above bounds: clamp to size-1
737 assert_eq!(extend(10, size, EdgeMode::Duplicate), 9);
738 assert_eq!(extend(20, size, EdgeMode::Duplicate), 9);
739 }
740
741 /// Test edge extension with Wrap mode.
742 #[test]
743 fn test_extend_wrap() {
744 let size = 10;
745
746 // In-bounds: identity
747 assert_eq!(extend(5, size, EdgeMode::Wrap), 5);
748
749 // Above bounds: wrap around
750 assert_eq!(extend(10, size, EdgeMode::Wrap), 0);
751 assert_eq!(extend(11, size, EdgeMode::Wrap), 1);
752 assert_eq!(extend(25, size, EdgeMode::Wrap), 5);
753
754 // Below bounds: wrap from end
755 assert_eq!(extend(-1, size, EdgeMode::Wrap), 9);
756 assert_eq!(extend(-2, size, EdgeMode::Wrap), 8);
757 }
758
759 /// Test edge extension with Mirror mode.
760 #[test]
761 fn test_extend_mirror() {
762 let size = 10;
763
764 // In-bounds: identity
765 assert_eq!(extend(5, size, EdgeMode::Mirror), 5);
766
767 // Just past boundary: mirror back
768 // For coord=10: period=20, c=10, c>=10 so c = 20-10-1 = 9
769 assert_eq!(extend(10, size, EdgeMode::Mirror), 9);
770 // For coord=11: period=20, c=11, c>=10 so c = 20-11-1 = 8
771 assert_eq!(extend(11, size, EdgeMode::Mirror), 8);
772
773 // Full reflection cycle (period = 2*size = 20)
774 // For coord=19: c=19, c>=10 so c = 20-19-1 = 0
775 assert_eq!(extend(19, size, EdgeMode::Mirror), 0);
776 // For coord=20: c=20%20=0, c<10 so c=0
777 assert_eq!(extend(20, size, EdgeMode::Mirror), 0);
778
779 // Negative coordinates
780 // For coord=-1: c=-1%20=-1, c<0 so c+=20 → c=19, c>=10 so c=20-19-1=0
781 assert_eq!(extend(-1, size, EdgeMode::Mirror), 0);
782 // For coord=-2: c=-2%20=-2, c<0 so c+=20 → c=18, c>=10 so c=20-18-1=1
783 assert_eq!(extend(-2, size, EdgeMode::Mirror), 1);
784 }
785
786 /// Test edge extension with None mode (coordinate should pass through).
787 #[test]
788 fn test_extend_none() {
789 let size = 10;
790 // None mode just passes the coordinate as u16
791 assert_eq!(extend(5, size, EdgeMode::None), 5);
792 assert_eq!(extend(9, size, EdgeMode::None), 9);
793 assert_eq!(extend(10, size, EdgeMode::None), 10);
794 assert_eq!(extend(11, size, EdgeMode::None), 11);
795 }
796
797 /// Test [1,3,3,1]/8 decimation weights.
798 #[test]
799 fn test_decimate_weighted() {
800 let p0 = PremulRgba8 {
801 r: 0,
802 g: 0,
803 b: 0,
804 a: 0,
805 };
806 let p1 = PremulRgba8 {
807 r: 8,
808 g: 8,
809 b: 8,
810 a: 8,
811 };
812 let p2 = PremulRgba8 {
813 r: 8,
814 g: 8,
815 b: 8,
816 a: 8,
817 };
818 let p3 = PremulRgba8 {
819 r: 0,
820 g: 0,
821 b: 0,
822 a: 0,
823 };
824
825 // (0 + 3*8 + 3*8 + 0 + 4) / 8 = (48 + 4) / 8 = 52 / 8 = 6.5 → 6 (rounds down)
826 let result = decimate_weighted(p0, p1, p2, p3);
827 assert_eq!(result.r, 6);
828 assert_eq!(result.g, 6);
829 assert_eq!(result.b, 6);
830 assert_eq!(result.a, 6);
831 }
832
833 /// Test [1,3,3,1]/8 with all same values (should be identity).
834 #[test]
835 fn test_decimate_weighted_uniform() {
836 let p = PremulRgba8 {
837 r: 100,
838 g: 150,
839 b: 200,
840 a: 255,
841 };
842 let result = decimate_weighted(p, p, p, p);
843 // All same: (100 + 300 + 300 + 100 + 4) / 8 = 804/8 = 100.5 → 100 (rounds down)
844 assert_eq!(result.r, 100);
845 assert_eq!(result.g, 150);
846 assert_eq!(result.b, 200);
847 assert_eq!(result.a, 255);
848 }
849
850 /// Test [0.25, 0.75] interpolation.
851 #[test]
852 fn test_interpolate_25_75() {
853 let p0 = PremulRgba8 {
854 r: 0,
855 g: 0,
856 b: 0,
857 a: 0,
858 };
859 let p1 = PremulRgba8 {
860 r: 100,
861 g: 100,
862 b: 100,
863 a: 100,
864 };
865
866 // (0 + 300 + 2) / 4 = 302 / 4 = 75.5 → 75 (rounds down)
867 let result = interpolate_25_75(p0, p1);
868 assert_eq!(result.r, 75);
869 assert_eq!(result.g, 75);
870 assert_eq!(result.b, 75);
871 assert_eq!(result.a, 75);
872 }
873
874 /// Test [0.75, 0.25] interpolation.
875 #[test]
876 fn test_interpolate_75_25() {
877 let p0 = PremulRgba8 {
878 r: 100,
879 g: 100,
880 b: 100,
881 a: 100,
882 };
883 let p1 = PremulRgba8 {
884 r: 0,
885 g: 0,
886 b: 0,
887 a: 0,
888 };
889
890 // (300 + 0 + 2) / 4 = 302 / 4 = 75.5 → 75 (rounds down)
891 let result = interpolate_75_25(p0, p1);
892 assert_eq!(result.r, 75);
893 assert_eq!(result.g, 75);
894 assert_eq!(result.b, 75);
895 assert_eq!(result.a, 75);
896 }
897
898 /// Test interpolation symmetry.
899 #[test]
900 fn test_interpolation_symmetry() {
901 let p0 = PremulRgba8 {
902 r: 50,
903 g: 100,
904 b: 150,
905 a: 200,
906 };
907 let p1 = PremulRgba8 {
908 r: 200,
909 g: 150,
910 b: 100,
911 a: 50,
912 };
913
914 let r1 = interpolate_25_75(p0, p1);
915 let r2 = interpolate_75_25(p1, p0);
916
917 // Should be symmetric
918 assert!((r1.r as i32 - r2.r as i32).abs() <= 0);
919 assert!((r1.g as i32 - r2.g as i32).abs() <= 0);
920 assert!((r1.b as i32 - r2.b as i32).abs() <= 0);
921 assert!((r1.a as i32 - r2.a as i32).abs() <= 0);
922 }
923
924 /// Test that very small image sizes don't panic.
925 #[test]
926 fn test_small_image_sizes() {
927 let mut pixmap = Pixmap::new(1, 1);
928 let (n_decimations, kernel, kernel_size) = plan_decimated_blur(2.0);
929
930 // Should not panic
931 let result = std::panic::catch_unwind(move || {
932 let mut scratch = Pixmap::new(1, 1);
933 apply_blur(
934 &mut pixmap,
935 &mut scratch,
936 n_decimations,
937 &kernel[..kernel_size],
938 EdgeMode::None,
939 );
940 });
941
942 assert!(result.is_ok());
943 }
944
945 /// Test downscale with odd dimensions.
946 #[test]
947 fn test_downscale_odd_dimensions() {
948 let mut pixmap = Pixmap::new(5, 5);
949 // Fill with white
950 for y in 0..5 {
951 for x in 0..5 {
952 pixmap.set_pixel(
953 x,
954 y,
955 PremulRgba8 {
956 r: 255,
957 g: 255,
958 b: 255,
959 a: 255,
960 },
961 );
962 }
963 }
964
965 let (new_width, new_height) = downscale(&mut pixmap, 5, 5, EdgeMode::Duplicate);
966 // 5 / 2 = 2.5 → ceil = 3
967 assert_eq!(new_width, 3);
968 assert_eq!(new_height, 3);
969 }
970
971 /// Test upscale dimensions.
972 #[test]
973 fn test_upscale_dimensions() {
974 let mut pixmap = Pixmap::new(6, 6);
975 let (new_width, new_height) = upscale(&mut pixmap, 3, 3, EdgeMode::Duplicate);
976 // 3 * 2 = 6
977 assert_eq!(new_width, 6);
978 assert_eq!(new_height, 6);
979 }
980
981 /// Test that horizontal convolution preserves uniform colors.
982 #[test]
983 fn test_convolve_x_uniform() {
984 let mut src = Pixmap::new(5, 3);
985 let mut dst = Pixmap::new(5, 3);
986 // Fill with uniform gray
987 src.data_mut().fill(PremulRgba8 {
988 r: 128,
989 g: 128,
990 b: 128,
991 a: 255,
992 });
993
994 let (kernel, kernel_size) = compute_gaussian_kernel(1.0);
995 convolve_x(
996 &src,
997 &mut dst,
998 5,
999 3,
1000 &kernel[..kernel_size],
1001 kernel_size / 2,
1002 EdgeMode::Duplicate,
1003 );
1004
1005 // Uniform input should produce uniform output
1006 for y in 0..3 {
1007 for x in 0..5 {
1008 let pixel = dst.sample(x, y);
1009 assert_eq!(
1010 pixel,
1011 PremulRgba8 {
1012 r: 128,
1013 g: 128,
1014 b: 128,
1015 a: 255,
1016 }
1017 );
1018 }
1019 }
1020 }
1021
1022 /// Test that vertical convolution preserves uniform colors.
1023 #[test]
1024 fn test_convolve_y_uniform() {
1025 let mut src = Pixmap::new(3, 5);
1026 let mut dst = Pixmap::new(3, 5);
1027 // Fill with uniform gray
1028 src.data_mut().fill(PremulRgba8 {
1029 r: 128,
1030 g: 128,
1031 b: 128,
1032 a: 255,
1033 });
1034
1035 let (kernel, kernel_size) = compute_gaussian_kernel(1.0);
1036 convolve_y(
1037 &src,
1038 &mut dst,
1039 3,
1040 5,
1041 &kernel[..kernel_size],
1042 kernel_size / 2,
1043 EdgeMode::Duplicate,
1044 );
1045
1046 // Uniform input should produce uniform output
1047 for y in 0..5 {
1048 for x in 0..3 {
1049 let pixel = dst.sample(x, y);
1050 assert_eq!(
1051 pixel,
1052 PremulRgba8 {
1053 r: 128,
1054 g: 128,
1055 b: 128,
1056 a: 255,
1057 }
1058 );
1059 }
1060 }
1061 }
1062
1063 /// Test that convolution with identity kernel is a no-op.
1064 #[test]
1065 fn test_convolve_identity_kernel() {
1066 let mut src = Pixmap::new(3, 3);
1067 let mut dst = Pixmap::new(3, 3);
1068 src.set_pixel(
1069 1,
1070 1,
1071 PremulRgba8 {
1072 r: 255,
1073 g: 100,
1074 b: 50,
1075 a: 200,
1076 },
1077 );
1078
1079 let kernel = [1.0]; // Identity kernel
1080 convolve_x(&src, &mut dst, 3, 3, &kernel, 0, EdgeMode::None);
1081
1082 // Should be unchanged
1083 assert_eq!(dst.sample(1, 1), src.sample(1, 1));
1084 }
1085
1086 /// Test that large sigma values are clamped to `MAX_KERNEL_SIZE`.
1087 #[test]
1088 fn test_large_sigma_clamped_to_max() {
1089 let (kernel, kernel_size) = compute_gaussian_kernel(100.0);
1090 // For σ=100, radius = ceil(300) = 300, size would be 601
1091 // But it should be clamped to MAX_KERNEL_SIZE
1092 assert_eq!(kernel_size, MAX_KERNEL_SIZE);
1093
1094 // The clamped kernel should still sum to 1.0 (normalized)
1095 let sum: f32 = kernel.iter().take(kernel_size).sum();
1096 assert!((sum - 1.0).abs() < 1e-6);
1097 }
1098
1099 /// Test that decimation prevents kernel from exceeding `MAX_KERNEL_SIZE`.
1100 #[test]
1101 fn test_decimation_reduces_kernel_size() {
1102 let (n_decimations, _kernel, kernel_size) = plan_decimated_blur(100.0);
1103 assert_eq!(kernel_size, 9);
1104 assert_eq!(n_decimations, 6, "Large sigma should trigger decimation");
1105 }
1106
1107 /// Test sampling behavior at exact boundaries for each edge mode.
1108 #[test]
1109 fn test_sample_x_at_boundaries() {
1110 let mut pixmap = Pixmap::new(3, 1);
1111 pixmap.set_pixel(
1112 0,
1113 0,
1114 PremulRgba8 {
1115 r: 10,
1116 g: 0,
1117 b: 0,
1118 a: 255,
1119 },
1120 );
1121 pixmap.set_pixel(
1122 1,
1123 0,
1124 PremulRgba8 {
1125 r: 20,
1126 g: 0,
1127 b: 0,
1128 a: 255,
1129 },
1130 );
1131 pixmap.set_pixel(
1132 2,
1133 0,
1134 PremulRgba8 {
1135 r: 30,
1136 g: 0,
1137 b: 0,
1138 a: 255,
1139 },
1140 );
1141
1142 // Test left boundary with Duplicate
1143 let mut p = sample_x(&pixmap, -1, 0, 3, EdgeMode::Duplicate);
1144 assert_eq!(p.r, 10); // Should clamp to first pixel
1145
1146 // Test right boundary with Duplicate
1147 p = sample_x(&pixmap, 3, 0, 3, EdgeMode::Duplicate);
1148 assert_eq!(p.r, 30); // Should clamp to last pixel
1149
1150 // Test with None mode (should return transparent black)
1151 p = sample_x(&pixmap, -1, 0, 3, EdgeMode::None);
1152 assert_eq!(p.a, 0);
1153 }
1154
1155 /// Test sampling behavior at exact boundaries for vertical sampling.
1156 #[test]
1157 fn test_sample_y_at_boundaries() {
1158 let mut pixmap = Pixmap::new(1, 3);
1159 pixmap.set_pixel(
1160 0,
1161 0,
1162 PremulRgba8 {
1163 r: 10,
1164 g: 0,
1165 b: 0,
1166 a: 255,
1167 },
1168 );
1169 pixmap.set_pixel(
1170 0,
1171 1,
1172 PremulRgba8 {
1173 r: 20,
1174 g: 0,
1175 b: 0,
1176 a: 255,
1177 },
1178 );
1179 pixmap.set_pixel(
1180 0,
1181 2,
1182 PremulRgba8 {
1183 r: 30,
1184 g: 0,
1185 b: 0,
1186 a: 255,
1187 },
1188 );
1189
1190 // Test top boundary with Duplicate
1191 let mut p = sample_y(&pixmap, 0, -1, 3, EdgeMode::Duplicate);
1192 assert_eq!(p.r, 10); // Should clamp to first pixel
1193
1194 // Test bottom boundary with Duplicate
1195 p = sample_y(&pixmap, 0, 3, 3, EdgeMode::Duplicate);
1196 assert_eq!(p.r, 30); // Should clamp to last pixel
1197
1198 // Test with None mode (should return transparent black)
1199 p = sample_y(&pixmap, 0, -1, 3, EdgeMode::None);
1200 assert_eq!(p.a, 0);
1201 }
1202
1203 /// Test that kernel normalization is precise for various sigma values.
1204 #[test]
1205 fn test_kernel_normalization_precision() {
1206 for sigma in [0.1, 0.5, 1.0, 2.0, 5.0, 10.0] {
1207 let (kernel, kernel_size) = compute_gaussian_kernel(sigma);
1208 let sum: f32 = kernel.iter().take(kernel_size).sum();
1209 assert!(
1210 (sum - 1.0).abs() < 1e-6,
1211 "Kernel for σ={} not normalized: sum={}",
1212 sigma,
1213 sum
1214 );
1215 }
1216 }
1217
1218 /// Test that downscale → upscale preserves dimensions.
1219 #[test]
1220 fn test_downscale_upscale_roundtrip() {
1221 let mut pixmap = Pixmap::new(8, 8);
1222 // Fill with a pattern
1223 pixmap.data_mut().fill(PremulRgba8 {
1224 r: 128,
1225 g: 128,
1226 b: 128,
1227 a: 255,
1228 });
1229
1230 let (w1, h1) = downscale(&mut pixmap, 8, 8, EdgeMode::Duplicate);
1231 assert_eq!(w1, 4);
1232 assert_eq!(h1, 4);
1233
1234 let (w2, h2) = upscale(&mut pixmap, w1, h1, EdgeMode::Duplicate);
1235 assert_eq!(w2, 8);
1236 assert_eq!(h2, 8);
1237 }
1238}