image_webp/
yuv.rs

1//! Utilities for doing the YUV -> RGB conversion
2//! The images are encoded in the Y'CbCr format as detailed here: <https://en.wikipedia.org/wiki/YCbCr>
3//! so need to be converted to RGB to be displayed
4//! To do the YUV -> RGB conversion we need to first decide how to map the yuv values to the pixels
5//! The y buffer is the same size as the pixel buffer so that maps 1-1 but the
6//! u and v buffers are half the size of the pixel buffer so we need to scale it up
7//! The simple way to upscale is just to take each u/v value and associate it with the 4
8//! pixels around it e.g. for a 4x4 image:
9//!
10//! ||||||
11//! |yyyy|
12//! |yyyy|
13//! |yyyy|
14//! |yyyy|
15//! ||||||
16//!
17//! |||||||
18//! |uu|vv|
19//! |uu|vv|
20//! |||||||
21//!
22//! Then each of the 2x2 pixels would match the u/v from the same quadrant
23//!
24//! However fancy upsampling is the default for libwebp which does a little more work to make the values smoother
25//! It interpolates u and v so that for e.g. the pixel 1 down and 1 from the left the u value
26//! would be (9*u0 + 3*u1 + 3*u2 + u3 + 8) / 16 and similar for the other pixels
27//! The edges are mirrored, so for the pixel 1 down and 0 from the left it uses (9*u0 + 3*u2 + 3*u0 + u2 + 8) / 16
28
29/// `_mm_mulhi_epu16` emulation
30fn mulhi(v: u8, coeff: u16) -> i32 {
31    ((u32::from(v) * u32::from(coeff)) >> 8) as i32
32}
33
34/// This function has been rewritten to encourage auto-vectorization.
35///
36/// Based on [src/dsp/yuv.h](https://github.com/webmproject/libwebp/blob/8534f53960befac04c9631e6e50d21dcb42dfeaf/src/dsp/yuv.h#L79)
37/// from the libwebp source.
38/// ```text
39/// const YUV_FIX2: i32 = 6;
40/// const YUV_MASK2: i32 = (256 << YUV_FIX2) - 1;
41/// fn clip(v: i32) -> u8 {
42///     if (v & !YUV_MASK2) == 0 {
43///         (v >> YUV_FIX2) as u8
44///     } else if v < 0 {
45///         0
46///     } else {
47///         255
48///     }
49/// }
50/// ```
51// Clippy suggests the clamp method, but it seems to optimize worse as of rustc 1.82.0 nightly.
52#[allow(clippy::manual_clamp)]
53fn clip(v: i32) -> u8 {
54    const YUV_FIX2: i32 = 6;
55    (v >> YUV_FIX2).max(0).min(255) as u8
56}
57
58#[inline(always)]
59fn yuv_to_r(y: u8, v: u8) -> u8 {
60    clip(mulhi(y, 19077) + mulhi(v, 26149) - 14234)
61}
62
63#[inline(always)]
64fn yuv_to_g(y: u8, u: u8, v: u8) -> u8 {
65    clip(mulhi(y, 19077) - mulhi(u, 6419) - mulhi(v, 13320) + 8708)
66}
67
68#[inline(always)]
69fn yuv_to_b(y: u8, u: u8) -> u8 {
70    clip(mulhi(y, 19077) + mulhi(u, 33050) - 17685)
71}
72
73/// Fills an rgb buffer with the image from the yuv buffers
74/// Size of the buffer is assumed to be correct
75/// BPP is short for bytes per pixel, allows both rgb and rgba to be decoded
76pub(crate) fn fill_rgb_buffer_fancy<const BPP: usize>(
77    buffer: &mut [u8],
78    y_buffer: &[u8],
79    u_buffer: &[u8],
80    v_buffer: &[u8],
81    width: usize,
82    height: usize,
83    buffer_width: usize,
84) {
85    // buffer width is always even so don't need to do div_ceil
86    let chroma_buffer_width = buffer_width / 2;
87    let chroma_width = width.div_ceil(2);
88
89    // fill top row first since it only uses the top u/v row
90    let top_row_y = &y_buffer[..width];
91    let top_row_u = &u_buffer[..chroma_width];
92    let top_row_v = &v_buffer[..chroma_width];
93    let top_row_buffer = &mut buffer[..width * BPP];
94    fill_row_fancy_with_1_uv_row::<BPP>(top_row_buffer, top_row_y, top_row_u, top_row_v);
95
96    let mut main_row_chunks = buffer[width * BPP..].chunks_exact_mut(width * BPP * 2);
97    // the y buffer iterator limits the end of the row iterator so we need this end index
98    let end_y_index = height * buffer_width;
99    let mut main_y_chunks = y_buffer[buffer_width..end_y_index].chunks_exact(buffer_width * 2);
100    let mut main_u_windows = u_buffer
101        .windows(chroma_buffer_width * 2)
102        .step_by(chroma_buffer_width);
103    let mut main_v_windows = v_buffer
104        .windows(chroma_buffer_width * 2)
105        .step_by(chroma_buffer_width);
106
107    for (((row_buffer, y_rows), u_rows), v_rows) in (&mut main_row_chunks)
108        .zip(&mut main_y_chunks)
109        .zip(&mut main_u_windows)
110        .zip(&mut main_v_windows)
111    {
112        let (u_row_1, u_row_2) = u_rows.split_at(chroma_buffer_width);
113        let (v_row_1, v_row_2) = v_rows.split_at(chroma_buffer_width);
114        let (row_buf_1, row_buf_2) = row_buffer.split_at_mut(width * BPP);
115        let (y_row_1, y_row_2) = y_rows.split_at(buffer_width);
116        fill_row_fancy_with_2_uv_rows::<BPP>(
117            row_buf_1,
118            &y_row_1[..width],
119            &u_row_1[..chroma_width],
120            &u_row_2[..chroma_width],
121            &v_row_1[..chroma_width],
122            &v_row_2[..chroma_width],
123        );
124        fill_row_fancy_with_2_uv_rows::<BPP>(
125            row_buf_2,
126            &y_row_2[..width],
127            &u_row_2[..chroma_width],
128            &u_row_1[..chroma_width],
129            &v_row_2[..chroma_width],
130            &v_row_1[..chroma_width],
131        );
132    }
133
134    let final_row_buffer = main_row_chunks.into_remainder();
135
136    // if the image has even height there will be one final row with only one u/v row matching it
137    if !final_row_buffer.is_empty() {
138        let final_y_row = main_y_chunks.remainder();
139
140        let chroma_height = height.div_ceil(2);
141        let start_chroma_index = (chroma_height - 1) * chroma_buffer_width;
142
143        let final_u_row = &u_buffer[start_chroma_index..];
144        let final_v_row = &v_buffer[start_chroma_index..];
145        fill_row_fancy_with_1_uv_row::<BPP>(
146            final_row_buffer,
147            &final_y_row[..width],
148            &final_u_row[..chroma_width],
149            &final_v_row[..chroma_width],
150        );
151    }
152}
153
154/// Fills a row with the fancy interpolation as detailed
155fn fill_row_fancy_with_2_uv_rows<const BPP: usize>(
156    row_buffer: &mut [u8],
157    y_row: &[u8],
158    u_row_1: &[u8],
159    u_row_2: &[u8],
160    v_row_1: &[u8],
161    v_row_2: &[u8],
162) {
163    // need to do left pixel separately since it will only have one u/v value
164    {
165        let rgb1 = &mut row_buffer[0..3];
166        let y_value = y_row[0];
167        // first pixel uses the first u/v as the main one
168        let u_value = get_fancy_chroma_value(u_row_1[0], u_row_1[0], u_row_2[0], u_row_2[0]);
169        let v_value = get_fancy_chroma_value(v_row_1[0], v_row_1[0], v_row_2[0], v_row_2[0]);
170        set_pixel(rgb1, y_value, u_value, v_value);
171    }
172
173    let rest_row_buffer = &mut row_buffer[BPP..];
174    let rest_y_row = &y_row[1..];
175
176    // we do two pixels at a time since they share the same u/v values
177    let mut main_row_chunks = rest_row_buffer.chunks_exact_mut(BPP * 2);
178    let mut main_y_chunks = rest_y_row.chunks_exact(2);
179
180    for (((((rgb, y_val), u_val_1), u_val_2), v_val_1), v_val_2) in (&mut main_row_chunks)
181        .zip(&mut main_y_chunks)
182        .zip(u_row_1.windows(2))
183        .zip(u_row_2.windows(2))
184        .zip(v_row_1.windows(2))
185        .zip(v_row_2.windows(2))
186    {
187        {
188            let rgb1 = &mut rgb[0..3];
189            let y_value = y_val[0];
190            // first pixel uses the first u/v as the main one
191            let u_value = get_fancy_chroma_value(u_val_1[0], u_val_1[1], u_val_2[0], u_val_2[1]);
192            let v_value = get_fancy_chroma_value(v_val_1[0], v_val_1[1], v_val_2[0], v_val_2[1]);
193            set_pixel(rgb1, y_value, u_value, v_value);
194        }
195        {
196            let rgb2 = &mut rgb[BPP..];
197            let y_value = y_val[1];
198            let u_value = get_fancy_chroma_value(u_val_1[1], u_val_1[0], u_val_2[1], u_val_2[0]);
199            let v_value = get_fancy_chroma_value(v_val_1[1], v_val_1[0], v_val_2[1], v_val_2[0]);
200            set_pixel(rgb2, y_value, u_value, v_value);
201        }
202    }
203
204    let final_pixel = main_row_chunks.into_remainder();
205    let final_y = main_y_chunks.remainder();
206
207    if let (rgb, [y_value]) = (final_pixel, final_y) {
208        let final_u_1 = *u_row_1.last().unwrap();
209        let final_u_2 = *u_row_2.last().unwrap();
210
211        let final_v_1 = *v_row_1.last().unwrap();
212        let final_v_2 = *v_row_2.last().unwrap();
213
214        let rgb1 = &mut rgb[0..3];
215        // first pixel uses the first u/v as the main one
216        let u_value = get_fancy_chroma_value(final_u_1, final_u_1, final_u_2, final_u_2);
217        let v_value = get_fancy_chroma_value(final_v_1, final_v_1, final_v_2, final_v_2);
218        set_pixel(rgb1, *y_value, u_value, v_value);
219    }
220}
221
222fn fill_row_fancy_with_1_uv_row<const BPP: usize>(
223    row_buffer: &mut [u8],
224    y_row: &[u8],
225    u_row: &[u8],
226    v_row: &[u8],
227) {
228    // doing left pixel first
229    {
230        let rgb1 = &mut row_buffer[0..3];
231        let y_value = y_row[0];
232
233        let u_value = u_row[0];
234        let v_value = v_row[0];
235        set_pixel(rgb1, y_value, u_value, v_value);
236    }
237
238    // two pixels at a time since they share the same u/v value
239    let mut main_row_chunks = row_buffer[BPP..].chunks_exact_mut(BPP * 2);
240    let mut main_y_row_chunks = y_row[1..].chunks_exact(2);
241
242    for (((rgb, y_val), u_val), v_val) in (&mut main_row_chunks)
243        .zip(&mut main_y_row_chunks)
244        .zip(u_row.windows(2))
245        .zip(v_row.windows(2))
246    {
247        {
248            let rgb1 = &mut rgb[0..3];
249            let y_value = y_val[0];
250            // first pixel uses the first u/v as the main one
251            let u_value = get_fancy_chroma_value(u_val[0], u_val[1], u_val[0], u_val[1]);
252            let v_value = get_fancy_chroma_value(v_val[0], v_val[1], v_val[0], v_val[1]);
253            set_pixel(rgb1, y_value, u_value, v_value);
254        }
255        {
256            let rgb2 = &mut rgb[BPP..];
257            let y_value = y_val[1];
258            let u_value = get_fancy_chroma_value(u_val[1], u_val[0], u_val[1], u_val[0]);
259            let v_value = get_fancy_chroma_value(v_val[1], v_val[0], v_val[1], v_val[0]);
260            set_pixel(rgb2, y_value, u_value, v_value);
261        }
262    }
263
264    let final_pixel = main_row_chunks.into_remainder();
265    let final_y = main_y_row_chunks.remainder();
266
267    if let (rgb, [final_y]) = (final_pixel, final_y) {
268        let final_u = *u_row.last().unwrap();
269        let final_v = *v_row.last().unwrap();
270
271        set_pixel(rgb, *final_y, final_u, final_v);
272    }
273}
274
275#[inline]
276fn get_fancy_chroma_value(main: u8, secondary1: u8, secondary2: u8, tertiary: u8) -> u8 {
277    let val0 = u16::from(main);
278    let val1 = u16::from(secondary1);
279    let val2 = u16::from(secondary2);
280    let val3 = u16::from(tertiary);
281    ((9 * val0 + 3 * val1 + 3 * val2 + val3 + 8) / 16) as u8
282}
283
284#[inline]
285fn set_pixel(rgb: &mut [u8], y: u8, u: u8, v: u8) {
286    rgb[0] = yuv_to_r(y, v);
287    rgb[1] = yuv_to_g(y, u, v);
288    rgb[2] = yuv_to_b(y, u);
289}
290
291/// Simple conversion, not currently used but could add a config to allow for using the simple
292#[allow(unused)]
293pub(crate) fn fill_rgb_buffer_simple<const BPP: usize>(
294    buffer: &mut [u8],
295    y_buffer: &[u8],
296    u_buffer: &[u8],
297    v_buffer: &[u8],
298    width: usize,
299    chroma_width: usize,
300    buffer_width: usize,
301) {
302    let u_row_twice_iter = u_buffer
303        .chunks_exact(buffer_width / 2)
304        .flat_map(|n| std::iter::repeat(n).take(2));
305    let v_row_twice_iter = v_buffer
306        .chunks_exact(buffer_width / 2)
307        .flat_map(|n| std::iter::repeat(n).take(2));
308
309    for (((row, y_row), u_row), v_row) in buffer
310        .chunks_exact_mut(width * BPP)
311        .zip(y_buffer.chunks_exact(buffer_width))
312        .zip(u_row_twice_iter)
313        .zip(v_row_twice_iter)
314    {
315        fill_rgba_row_simple::<BPP>(
316            &y_row[..width],
317            &u_row[..chroma_width],
318            &v_row[..chroma_width],
319            row,
320        );
321    }
322}
323
324fn fill_rgba_row_simple<const BPP: usize>(
325    y_vec: &[u8],
326    u_vec: &[u8],
327    v_vec: &[u8],
328    rgba: &mut [u8],
329) {
330    // Fill 2 pixels per iteration: these pixels share `u` and `v` components
331    let mut rgb_chunks = rgba.chunks_exact_mut(BPP * 2);
332    let mut y_chunks = y_vec.chunks_exact(2);
333    let mut u_iter = u_vec.iter();
334    let mut v_iter = v_vec.iter();
335
336    for (((rgb, y), &u), &v) in (&mut rgb_chunks)
337        .zip(&mut y_chunks)
338        .zip(&mut u_iter)
339        .zip(&mut v_iter)
340    {
341        let coeffs = [
342            mulhi(v, 26149),
343            mulhi(u, 6419),
344            mulhi(v, 13320),
345            mulhi(u, 33050),
346        ];
347
348        let get_r = |y: u8| clip(mulhi(y, 19077) + coeffs[0] - 14234);
349        let get_g = |y: u8| clip(mulhi(y, 19077) - coeffs[1] - coeffs[2] + 8708);
350        let get_b = |y: u8| clip(mulhi(y, 19077) + coeffs[3] - 17685);
351
352        let rgb1 = &mut rgb[0..3];
353        rgb1[0] = get_r(y[0]);
354        rgb1[1] = get_g(y[0]);
355        rgb1[2] = get_b(y[0]);
356
357        let rgb2 = &mut rgb[BPP..];
358        rgb2[0] = get_r(y[1]);
359        rgb2[1] = get_g(y[1]);
360        rgb2[2] = get_b(y[1]);
361    }
362
363    let remainder = rgb_chunks.into_remainder();
364    if remainder.len() >= 3 {
365        if let (Some(&y), Some(&u), Some(&v)) = (
366            y_chunks.remainder().iter().next(),
367            u_iter.next(),
368            v_iter.next(),
369        ) {
370            let coeffs = [
371                mulhi(v, 26149),
372                mulhi(u, 6419),
373                mulhi(v, 13320),
374                mulhi(u, 33050),
375            ];
376
377            remainder[0] = clip(mulhi(y, 19077) + coeffs[0] - 14234);
378            remainder[1] = clip(mulhi(y, 19077) - coeffs[1] - coeffs[2] + 8708);
379            remainder[2] = clip(mulhi(y, 19077) + coeffs[3] - 17685);
380        }
381    }
382}
383
384#[cfg(test)]
385mod tests {
386    use super::*;
387
388    #[test]
389    fn test_fancy_grid() {
390        #[rustfmt::skip]
391        let y_buffer = [
392            77, 162, 202, 185,
393            28, 13, 199, 182,
394            135, 147, 164, 135, 
395            66, 27, 171, 130,
396        ];
397
398        #[rustfmt::skip]
399        let u_buffer = [
400            34, 101, 
401            123, 163
402        ];
403
404        #[rustfmt::skip]
405        let v_buffer = [
406            97, 167,
407            149, 23,
408        ];
409
410        let mut rgb_buffer = [0u8; 16 * 3];
411        fill_rgb_buffer_fancy::<3>(&mut rgb_buffer, &y_buffer, &u_buffer, &v_buffer, 4, 4, 4);
412
413        #[rustfmt::skip]
414        let upsampled_u_buffer = [
415            34, 51, 84, 101,
416            56, 71, 101, 117,
417            101, 112, 136, 148,
418            123, 133, 153, 163,
419        ];
420
421        #[rustfmt::skip]
422        let upsampled_v_buffer = [
423            97, 115, 150, 167,
424            110, 115, 126, 131,
425            136, 117, 78, 59,
426            149, 118, 55, 23,
427        ];
428
429        let mut upsampled_rgb_buffer = [0u8; 16 * 3];
430        for (((rgb_val, y), u), v) in upsampled_rgb_buffer
431            .chunks_exact_mut(3)
432            .zip(y_buffer)
433            .zip(upsampled_u_buffer)
434            .zip(upsampled_v_buffer)
435        {
436            rgb_val[0] = yuv_to_r(y, v);
437            rgb_val[1] = yuv_to_g(y, u, v);
438            rgb_val[2] = yuv_to_b(y, u);
439        }
440
441        assert_eq!(rgb_buffer, upsampled_rgb_buffer);
442    }
443
444    #[test]
445    fn test_yuv_conversions() {
446        let (y, u, v) = (203, 40, 42);
447
448        assert_eq!(yuv_to_r(y, v), 80);
449        assert_eq!(yuv_to_g(y, u, v), 255);
450        assert_eq!(yuv_to_b(y, u), 40);
451    }
452}