image_webp/
lossless_transform.rs

1use std::ops::Range;
2
3use crate::decoder::DecodingError;
4
5use super::lossless::subsample_size;
6
7#[derive(Debug, Clone)]
8pub(crate) enum TransformType {
9    PredictorTransform {
10        size_bits: u8,
11        predictor_data: Vec<u8>,
12    },
13    ColorTransform {
14        size_bits: u8,
15        transform_data: Vec<u8>,
16    },
17    SubtractGreen,
18    ColorIndexingTransform {
19        table_size: u16,
20        table_data: Vec<u8>,
21    },
22}
23
24pub(crate) fn apply_predictor_transform(
25    image_data: &mut [u8],
26    width: u16,
27    height: u16,
28    size_bits: u8,
29    predictor_data: &[u8],
30) -> Result<(), DecodingError> {
31    let block_xsize = usize::from(subsample_size(width, size_bits));
32    let width = usize::from(width);
33    let height = usize::from(height);
34
35    // Handle top and left borders specially. This involves ignoring mode and using specific
36    // predictors for each.
37    image_data[3] = image_data[3].wrapping_add(255);
38    apply_predictor_transform_1(image_data, 4..width * 4, width);
39    for y in 1..height {
40        for i in 0..4 {
41            image_data[y * width * 4 + i] =
42                image_data[y * width * 4 + i].wrapping_add(image_data[(y - 1) * width * 4 + i]);
43        }
44    }
45
46    for y in 1..height {
47        for block_x in 0..block_xsize {
48            let block_index = (y >> size_bits) * block_xsize + block_x;
49            let predictor = predictor_data[block_index * 4 + 1];
50            let start_index = (y * width + (block_x << size_bits).max(1)) * 4;
51            let end_index = (y * width + ((block_x + 1) << size_bits).min(width)) * 4;
52
53            match predictor {
54                0 => apply_predictor_transform_0(image_data, start_index..end_index, width),
55                1 => apply_predictor_transform_1(image_data, start_index..end_index, width),
56                2 => apply_predictor_transform_2(image_data, start_index..end_index, width),
57                3 => apply_predictor_transform_3(image_data, start_index..end_index, width),
58                4 => apply_predictor_transform_4(image_data, start_index..end_index, width),
59                5 => apply_predictor_transform_5(image_data, start_index..end_index, width),
60                6 => apply_predictor_transform_6(image_data, start_index..end_index, width),
61                7 => apply_predictor_transform_7(image_data, start_index..end_index, width),
62                8 => apply_predictor_transform_8(image_data, start_index..end_index, width),
63                9 => apply_predictor_transform_9(image_data, start_index..end_index, width),
64                10 => apply_predictor_transform_10(image_data, start_index..end_index, width),
65                11 => apply_predictor_transform_11(image_data, start_index..end_index, width),
66                12 => apply_predictor_transform_12(image_data, start_index..end_index, width),
67                13 => apply_predictor_transform_13(image_data, start_index..end_index, width),
68                _ => {}
69            }
70        }
71    }
72
73    Ok(())
74}
75pub fn apply_predictor_transform_0(image_data: &mut [u8], range: Range<usize>, _width: usize) {
76    assert!(range.end <= image_data.len());
77    let mut i = range.start + 3;
78    while i < range.end {
79        image_data[i] = image_data[i].wrapping_add(0xff);
80        i += 4;
81    }
82}
83pub fn apply_predictor_transform_1(image_data: &mut [u8], range: Range<usize>, _width: usize) {
84    assert!(range.end <= image_data.len());
85    let mut i = range.start;
86    while i < range.end {
87        image_data[i] = image_data[i].wrapping_add(image_data[i - 4]);
88        i += 1;
89    }
90}
91pub fn apply_predictor_transform_2(image_data: &mut [u8], range: Range<usize>, width: usize) {
92    assert!(range.end <= image_data.len());
93    let mut i = range.start;
94    while i < range.end {
95        image_data[i] = image_data[i].wrapping_add(image_data[i - width * 4]);
96        i += 1;
97    }
98}
99pub fn apply_predictor_transform_3(image_data: &mut [u8], range: Range<usize>, width: usize) {
100    assert!(range.end <= image_data.len());
101    let mut i = range.start;
102    while i < range.end {
103        image_data[i] = image_data[i].wrapping_add(image_data[i - width * 4 + 4]);
104        i += 1;
105    }
106}
107pub fn apply_predictor_transform_4(image_data: &mut [u8], range: Range<usize>, width: usize) {
108    assert!(range.end <= image_data.len());
109    let mut i = range.start;
110    while i < range.end {
111        image_data[i] = image_data[i].wrapping_add(image_data[i - width * 4 - 4]);
112        i += 1;
113    }
114}
115pub fn apply_predictor_transform_5(image_data: &mut [u8], range: Range<usize>, width: usize) {
116    let (old, current) = image_data[..range.end].split_at_mut(range.start);
117
118    let mut prev: [u8; 4] = old[range.start - 4..][..4].try_into().unwrap();
119    let top_right = &old[range.start - width * 4 + 4..];
120    let top = &old[range.start - width * 4..];
121
122    for ((chunk, tr), t) in current
123        .chunks_exact_mut(4)
124        .zip(top_right.chunks_exact(4))
125        .zip(top.chunks_exact(4))
126    {
127        prev = [
128            chunk[0].wrapping_add(average2_autovec(average2_autovec(prev[0], tr[0]), t[0])),
129            chunk[1].wrapping_add(average2_autovec(average2_autovec(prev[1], tr[1]), t[1])),
130            chunk[2].wrapping_add(average2_autovec(average2_autovec(prev[2], tr[2]), t[2])),
131            chunk[3].wrapping_add(average2_autovec(average2_autovec(prev[3], tr[3]), t[3])),
132        ];
133        chunk.copy_from_slice(&prev);
134    }
135}
136pub fn apply_predictor_transform_6(image_data: &mut [u8], range: Range<usize>, width: usize) {
137    assert!(range.end <= image_data.len());
138    let mut i = range.start;
139    while i < range.end {
140        image_data[i] =
141            image_data[i].wrapping_add(average2(image_data[i - 4], image_data[i - width * 4 - 4]));
142        i += 1;
143    }
144}
145pub fn apply_predictor_transform_7(image_data: &mut [u8], range: Range<usize>, width: usize) {
146    let (old, current) = image_data[..range.end].split_at_mut(range.start);
147
148    let mut prev: [u8; 4] = old[range.start - 4..][..4].try_into().unwrap();
149    let top = &old[range.start - width * 4..][..(range.end - range.start)];
150
151    let mut current_chunks = current.chunks_exact_mut(64);
152    let mut top_chunks = top.chunks_exact(64);
153
154    for (current, top) in (&mut current_chunks).zip(&mut top_chunks) {
155        for (chunk, t) in current.chunks_exact_mut(4).zip(top.chunks_exact(4)) {
156            prev = [
157                chunk[0].wrapping_add(average2_autovec(prev[0], t[0])),
158                chunk[1].wrapping_add(average2_autovec(prev[1], t[1])),
159                chunk[2].wrapping_add(average2_autovec(prev[2], t[2])),
160                chunk[3].wrapping_add(average2_autovec(prev[3], t[3])),
161            ];
162            chunk.copy_from_slice(&prev);
163        }
164    }
165    for (chunk, t) in current_chunks
166        .into_remainder()
167        .chunks_exact_mut(4)
168        .zip(top_chunks.remainder().chunks_exact(4))
169    {
170        prev = [
171            chunk[0].wrapping_add(average2_autovec(prev[0], t[0])),
172            chunk[1].wrapping_add(average2_autovec(prev[1], t[1])),
173            chunk[2].wrapping_add(average2_autovec(prev[2], t[2])),
174            chunk[3].wrapping_add(average2_autovec(prev[3], t[3])),
175        ];
176        chunk.copy_from_slice(&prev);
177    }
178}
179pub fn apply_predictor_transform_8(image_data: &mut [u8], range: Range<usize>, width: usize) {
180    assert!(range.end <= image_data.len());
181    let mut i = range.start;
182    while i < range.end {
183        image_data[i] = image_data[i].wrapping_add(average2(
184            image_data[i - width * 4 - 4],
185            image_data[i - width * 4],
186        ));
187        i += 1;
188    }
189}
190pub fn apply_predictor_transform_9(image_data: &mut [u8], range: Range<usize>, width: usize) {
191    assert!(range.end <= image_data.len());
192    let mut i = range.start;
193    while i < range.end {
194        image_data[i] = image_data[i].wrapping_add(average2(
195            image_data[i - width * 4],
196            image_data[i - width * 4 + 4],
197        ));
198        i += 1;
199    }
200}
201pub fn apply_predictor_transform_10(image_data: &mut [u8], range: Range<usize>, width: usize) {
202    let (old, current) = image_data[..range.end].split_at_mut(range.start);
203    let mut prev: [u8; 4] = old[range.start - 4..][..4].try_into().unwrap();
204
205    let top_left = &old[range.start - width * 4 - 4..];
206    let top = &old[range.start - width * 4..];
207    let top_right = &old[range.start - width * 4 + 4..];
208
209    for (((chunk, tl), t), tr) in current
210        .chunks_exact_mut(4)
211        .zip(top_left.chunks_exact(4))
212        .zip(top.chunks_exact(4))
213        .zip(top_right.chunks_exact(4))
214    {
215        prev = [
216            chunk[0].wrapping_add(average2(average2(prev[0], tl[0]), average2(t[0], tr[0]))),
217            chunk[1].wrapping_add(average2(average2(prev[1], tl[1]), average2(t[1], tr[1]))),
218            chunk[2].wrapping_add(average2(average2(prev[2], tl[2]), average2(t[2], tr[2]))),
219            chunk[3].wrapping_add(average2(average2(prev[3], tl[3]), average2(t[3], tr[3]))),
220        ];
221        chunk.copy_from_slice(&prev);
222    }
223}
224pub fn apply_predictor_transform_11(image_data: &mut [u8], range: Range<usize>, width: usize) {
225    let (old, current) = image_data[..range.end].split_at_mut(range.start);
226    let top = &old[range.start - width * 4..];
227
228    let mut l = [
229        i16::from(old[range.start - 4]),
230        i16::from(old[range.start - 3]),
231        i16::from(old[range.start - 2]),
232        i16::from(old[range.start - 1]),
233    ];
234    let mut tl = [
235        i16::from(old[range.start - width * 4 - 4]),
236        i16::from(old[range.start - width * 4 - 3]),
237        i16::from(old[range.start - width * 4 - 2]),
238        i16::from(old[range.start - width * 4 - 1]),
239    ];
240
241    for (chunk, top) in current.chunks_exact_mut(4).zip(top.chunks_exact(4)) {
242        let t = [
243            i16::from(top[0]),
244            i16::from(top[1]),
245            i16::from(top[2]),
246            i16::from(top[3]),
247        ];
248
249        let mut predict_left = 0;
250        let mut predict_top = 0;
251        for i in 0..4 {
252            let predict = l[i] + t[i] - tl[i];
253            predict_left += i16::abs(predict - l[i]);
254            predict_top += i16::abs(predict - t[i]);
255        }
256
257        if predict_left < predict_top {
258            chunk.copy_from_slice(&[
259                chunk[0].wrapping_add(l[0] as u8),
260                chunk[1].wrapping_add(l[1] as u8),
261                chunk[2].wrapping_add(l[2] as u8),
262                chunk[3].wrapping_add(l[3] as u8),
263            ]);
264        } else {
265            chunk.copy_from_slice(&[
266                chunk[0].wrapping_add(t[0] as u8),
267                chunk[1].wrapping_add(t[1] as u8),
268                chunk[2].wrapping_add(t[2] as u8),
269                chunk[3].wrapping_add(t[3] as u8),
270            ]);
271        }
272
273        tl = t;
274        l = [
275            i16::from(chunk[0]),
276            i16::from(chunk[1]),
277            i16::from(chunk[2]),
278            i16::from(chunk[3]),
279        ];
280    }
281}
282pub fn apply_predictor_transform_12(image_data: &mut [u8], range: Range<usize>, width: usize) {
283    let (old, current) = image_data[..range.end].split_at_mut(range.start);
284    let mut prev: [u8; 4] = old[range.start - 4..][..4].try_into().unwrap();
285
286    let top_left = &old[range.start - width * 4 - 4..];
287    let top = &old[range.start - width * 4..];
288
289    for ((chunk, tl), t) in current
290        .chunks_exact_mut(4)
291        .zip(top_left.chunks_exact(4))
292        .zip(top.chunks_exact(4))
293    {
294        prev = [
295            chunk[0].wrapping_add(clamp_add_subtract_full(
296                i16::from(prev[0]),
297                i16::from(t[0]),
298                i16::from(tl[0]),
299            )),
300            chunk[1].wrapping_add(clamp_add_subtract_full(
301                i16::from(prev[1]),
302                i16::from(t[1]),
303                i16::from(tl[1]),
304            )),
305            chunk[2].wrapping_add(clamp_add_subtract_full(
306                i16::from(prev[2]),
307                i16::from(t[2]),
308                i16::from(tl[2]),
309            )),
310            chunk[3].wrapping_add(clamp_add_subtract_full(
311                i16::from(prev[3]),
312                i16::from(t[3]),
313                i16::from(tl[3]),
314            )),
315        ];
316        chunk.copy_from_slice(&prev);
317    }
318}
319pub fn apply_predictor_transform_13(image_data: &mut [u8], range: Range<usize>, width: usize) {
320    let (old, current) = image_data[..range.end].split_at_mut(range.start);
321    let mut prev: [u8; 4] = old[range.start - 4..][..4].try_into().unwrap();
322
323    let top_left = &old[range.start - width * 4 - 4..][..(range.end - range.start)];
324    let top = &old[range.start - width * 4..][..(range.end - range.start)];
325
326    for ((chunk, tl), t) in current
327        .chunks_exact_mut(4)
328        .zip(top_left.chunks_exact(4))
329        .zip(top.chunks_exact(4))
330    {
331        prev = [
332            chunk[0].wrapping_add(clamp_add_subtract_half(
333                (i16::from(prev[0]) + i16::from(t[0])) / 2,
334                i16::from(tl[0]),
335            )),
336            chunk[1].wrapping_add(clamp_add_subtract_half(
337                (i16::from(prev[1]) + i16::from(t[1])) / 2,
338                i16::from(tl[1]),
339            )),
340            chunk[2].wrapping_add(clamp_add_subtract_half(
341                (i16::from(prev[2]) + i16::from(t[2])) / 2,
342                i16::from(tl[2]),
343            )),
344            chunk[3].wrapping_add(clamp_add_subtract_half(
345                (i16::from(prev[3]) + i16::from(t[3])) / 2,
346                i16::from(tl[3]),
347            )),
348        ];
349        chunk.copy_from_slice(&prev);
350    }
351}
352
353pub(crate) fn apply_color_transform(
354    image_data: &mut [u8],
355    width: u16,
356    size_bits: u8,
357    transform_data: &[u8],
358) {
359    let block_xsize = usize::from(subsample_size(width, size_bits));
360    let width = usize::from(width);
361
362    for (y, row) in image_data.chunks_exact_mut(width * 4).enumerate() {
363        let row_transform_data_start = (y >> size_bits) * block_xsize * 4;
364        // the length of block_tf_data should be `block_xsize * 4`, so we could slice it with [..block_xsize * 4]
365        // but there is no point - `.zip()` runs until either of the iterators is consumed,
366        // so the extra slicing operation would be doing more work for no reason
367        let row_tf_data = &transform_data[row_transform_data_start..];
368
369        for (block, transform) in row
370            .chunks_mut(4 << size_bits)
371            .zip(row_tf_data.chunks_exact(4))
372        {
373            let red_to_blue = transform[0];
374            let green_to_blue = transform[1];
375            let green_to_red = transform[2];
376
377            for pixel in block.chunks_exact_mut(4) {
378                let green = u32::from(pixel[1]);
379                let mut temp_red = u32::from(pixel[0]);
380                let mut temp_blue = u32::from(pixel[2]);
381
382                temp_red += color_transform_delta(green_to_red as i8, green as i8);
383                temp_blue += color_transform_delta(green_to_blue as i8, green as i8);
384                temp_blue += color_transform_delta(red_to_blue as i8, temp_red as i8);
385
386                pixel[0] = (temp_red & 0xff) as u8;
387                pixel[2] = (temp_blue & 0xff) as u8;
388            }
389        }
390    }
391}
392
393pub(crate) fn apply_subtract_green_transform(image_data: &mut [u8]) {
394    for pixel in image_data.chunks_exact_mut(4) {
395        pixel[0] = pixel[0].wrapping_add(pixel[1]);
396        pixel[2] = pixel[2].wrapping_add(pixel[1]);
397    }
398}
399
400pub(crate) fn apply_color_indexing_transform(
401    image_data: &mut [u8],
402    width: u16,
403    height: u16,
404    table_size: u16,
405    table_data: &[u8],
406) {
407    assert!(table_size > 0);
408    if table_size > 16 {
409        // convert the table of colors into a Vec of color values that can be directly indexed
410        let mut table: Vec<[u8; 4]> = table_data
411            .chunks_exact(4)
412            // convince the compiler that each chunk is 4 bytes long, important for optimizations in the loop below
413            .map(|c| TryInto::<[u8; 4]>::try_into(c).unwrap())
414            .collect();
415        // pad the table to 256 values if it's smaller than that so we could index into it by u8 without bounds checks
416        // also required for correctness: WebP spec requires out-of-bounds indices to be treated as [0,0,0,0]
417        table.resize(256, [0; 4]);
418        // convince the compiler that the length of the table is 256 to avoid bounds checks in the loop below
419        let table: &[[u8; 4]; 256] = table.as_slice().try_into().unwrap();
420
421        for pixel in image_data.chunks_exact_mut(4) {
422            // Index is in G channel.
423            // WebP format encodes ARGB pixels, but we permute to RGBA immediately after reading from the bitstream.
424            pixel.copy_from_slice(&table[pixel[1] as usize]);
425        }
426    } else {
427        // table_size_u16 is 1 to 16
428        let table_size = table_size as u8;
429
430        // Dispatch to specialized implementation for each table size band for performance.
431        // Otherwise the compiler doesn't know the size of our copies
432        // and ends up calling out to memmove for every pixel even though a single load is sufficient.
433        if table_size <= 2 {
434            // Max 2 colors, 1 bit per pixel index -> W_BITS = 3
435            const W_BITS_VAL: u8 = 3;
436            // EXP_ENTRY_SIZE is 4 bytes/pixel * (1 << W_BITS_VAL) pixels/entry
437            const EXP_ENTRY_SIZE_VAL: usize = 4 * (1 << W_BITS_VAL); // 4 * 8 = 32
438            apply_color_indexing_transform_small_table::<W_BITS_VAL, EXP_ENTRY_SIZE_VAL>(
439                image_data, width, height, table_size, table_data,
440            );
441        } else if table_size <= 4 {
442            // Max 4 colors, 2 bits per pixel index -> W_BITS = 2
443            const W_BITS_VAL: u8 = 2;
444            const EXP_ENTRY_SIZE_VAL: usize = 4 * (1 << W_BITS_VAL); // 4 * 4 = 16
445            apply_color_indexing_transform_small_table::<W_BITS_VAL, EXP_ENTRY_SIZE_VAL>(
446                image_data, width, height, table_size, table_data,
447            );
448        } else {
449            // Max 16 colors (5 to 16), 4 bits per pixel index -> W_BITS = 1
450            // table_size_u16 must be <= 16 here
451            const W_BITS_VAL: u8 = 1;
452            const EXP_ENTRY_SIZE_VAL: usize = 4 * (1 << W_BITS_VAL); // 4 * 2 = 8
453            apply_color_indexing_transform_small_table::<W_BITS_VAL, EXP_ENTRY_SIZE_VAL>(
454                image_data, width, height, table_size, table_data,
455            );
456        }
457    }
458}
459
460// Helper function with const generics for W_BITS and EXP_ENTRY_SIZE
461fn apply_color_indexing_transform_small_table<const W_BITS: u8, const EXP_ENTRY_SIZE: usize>(
462    image_data: &mut [u8],
463    width: u16,
464    height: u16,
465    table_size: u8, // Max 16
466    table_data: &[u8],
467) {
468    // As of Rust 1.87 we cannot use `const` here. The compiler can still optimize them heavily
469    // because W_BITS is a const generic for each instantiation of this function.
470    let pixels_per_packed_byte_u8: u8 = 1 << W_BITS;
471    let bits_per_entry_u8: u8 = 8 / pixels_per_packed_byte_u8;
472    let mask_u8: u8 = (1 << bits_per_entry_u8) - 1;
473
474    // This is also effectively a compile-time constant for each instantiation.
475    let pixels_per_packed_byte_usize: usize = pixels_per_packed_byte_u8 as usize;
476
477    // Verify that the passed EXP_ENTRY_SIZE matches our calculation based on W_BITS, just as a sanity check.
478    debug_assert_eq!(
479        EXP_ENTRY_SIZE,
480        4 * pixels_per_packed_byte_usize,
481        "Mismatch in EXP_ENTRY_SIZE"
482    );
483
484    // Precompute the full lookup table.
485    // Each of the 256 possible packed byte values maps to an array of RGBA pixels.
486    // The array type uses the const generic EXP_ENTRY_SIZE.
487    let expanded_lookup_table_storage: Vec<[u8; EXP_ENTRY_SIZE]> = (0..256u16)
488        .map(|packed_byte_value_u16| {
489            let mut entry_pixels_array = [0u8; EXP_ENTRY_SIZE]; // Uses const generic
490            let packed_byte_value = packed_byte_value_u16 as u8;
491
492            // Loop bound is effectively constant for each instantiation.
493            for pixel_sub_index in 0..pixels_per_packed_byte_usize {
494                let shift_amount = (pixel_sub_index as u8) * bits_per_entry_u8;
495                let k = (packed_byte_value >> shift_amount) & mask_u8;
496
497                let color_source_array: [u8; 4] = if k < table_size {
498                    let color_data_offset = usize::from(k) * 4;
499                    table_data[color_data_offset..color_data_offset + 4]
500                        .try_into()
501                        .unwrap()
502                } else {
503                    [0u8; 4] // WebP spec: out-of-bounds indices are [0,0,0,0]
504                };
505
506                let array_fill_offset = pixel_sub_index * 4;
507                entry_pixels_array[array_fill_offset..array_fill_offset + 4]
508                    .copy_from_slice(&color_source_array);
509            }
510            entry_pixels_array
511        })
512        .collect();
513
514    let expanded_lookup_table_array: &[[u8; EXP_ENTRY_SIZE]; 256] =
515        expanded_lookup_table_storage.as_slice().try_into().unwrap();
516
517    let packed_image_width_in_blocks = width.div_ceil(pixels_per_packed_byte_u8.into()) as usize;
518
519    if width == 0 || height == 0 {
520        return;
521    }
522
523    let final_block_expanded_size_bytes =
524        (width as usize * 4) - EXP_ENTRY_SIZE * (packed_image_width_in_blocks.saturating_sub(1));
525
526    let input_stride_bytes_packed = packed_image_width_in_blocks * 4;
527    let output_stride_bytes_expanded = width as usize * 4;
528
529    let mut packed_indices_for_row: Vec<u8> = vec![0; packed_image_width_in_blocks];
530
531    for y_rev_idx in 0..height as usize {
532        let y = height as usize - 1 - y_rev_idx;
533
534        let packed_row_input_global_offset = y * input_stride_bytes_packed;
535        let packed_argb_row_slice =
536            &image_data[packed_row_input_global_offset..][..input_stride_bytes_packed];
537
538        for (packed_argb_chunk, packed_idx) in packed_argb_row_slice
539            .chunks_exact(4)
540            .zip(packed_indices_for_row.iter_mut())
541        {
542            *packed_idx = packed_argb_chunk[1];
543        }
544
545        let output_row_global_offset = y * output_stride_bytes_expanded;
546        let output_row_slice_mut =
547            &mut image_data[output_row_global_offset..][..output_stride_bytes_expanded];
548
549        let num_full_blocks = packed_image_width_in_blocks.saturating_sub(1);
550
551        let (full_blocks_part, final_block_part) =
552            output_row_slice_mut.split_at_mut(num_full_blocks * EXP_ENTRY_SIZE);
553
554        for (output_chunk_slice, &packed_index_byte) in full_blocks_part
555            .chunks_exact_mut(EXP_ENTRY_SIZE) // Uses const generic to avoid expensive memmove call
556            .zip(packed_indices_for_row.iter())
557        {
558            let output_chunk_array: &mut [u8; EXP_ENTRY_SIZE] =
559                output_chunk_slice.try_into().unwrap();
560
561            let colors_data_array = &expanded_lookup_table_array[packed_index_byte as usize];
562
563            *output_chunk_array = *colors_data_array;
564        }
565
566        if packed_image_width_in_blocks > 0 {
567            let final_packed_index_byte = packed_indices_for_row[packed_image_width_in_blocks - 1];
568            let colors_data_full_array =
569                &expanded_lookup_table_array[final_packed_index_byte as usize];
570
571            final_block_part
572                .copy_from_slice(&colors_data_full_array[..final_block_expanded_size_bytes]);
573        }
574    }
575}
576
577//predictor functions
578
579/// Get average of 2 bytes
580fn average2(a: u8, b: u8) -> u8 {
581    ((u16::from(a) + u16::from(b)) / 2) as u8
582}
583
584/// Get average of 2 bytes, allows some predictors to be autovectorized by
585/// keeping computation within lanes of `u8`.
586///
587/// LLVM is capable of optimizing `average2` into this but not in all cases.
588fn average2_autovec(a: u8, b: u8) -> u8 {
589    (a & b) + ((a ^ b) >> 1)
590}
591
592/// Clamp add subtract full on one part
593fn clamp_add_subtract_full(a: i16, b: i16, c: i16) -> u8 {
594    // Clippy suggests the clamp method, but it seems to optimize worse as of rustc 1.82.0 nightly.
595    #![allow(clippy::manual_clamp)]
596    (a + b - c).max(0).min(255) as u8
597}
598
599/// Clamp add subtract half on one part
600fn clamp_add_subtract_half(a: i16, b: i16) -> u8 {
601    // Clippy suggests the clamp method, but it seems to optimize worse as of rustc 1.82.0 nightly.
602    #![allow(clippy::manual_clamp)]
603    (a + (a - b) / 2).max(0).min(255) as u8
604}
605
606/// Does color transform on 2 numbers
607fn color_transform_delta(t: i8, c: i8) -> u32 {
608    (i32::from(t) * i32::from(c)) as u32 >> 5
609}
610
611#[cfg(all(test, feature = "_benchmarks"))]
612mod benches {
613    use rand::Rng;
614    use test::{black_box, Bencher};
615
616    fn measure_predictor(b: &mut Bencher, predictor: fn(&mut [u8], std::ops::Range<usize>, usize)) {
617        let width = 256;
618        let mut data = vec![0u8; width * 8];
619        rand::thread_rng().fill(&mut data[..]);
620        b.bytes = 4 * width as u64 - 4;
621        b.iter(|| {
622            predictor(
623                black_box(&mut data),
624                black_box(width * 4 + 4..width * 8),
625                black_box(width),
626            )
627        });
628    }
629
630    #[bench]
631    fn predictor00(b: &mut Bencher) {
632        measure_predictor(b, super::apply_predictor_transform_0);
633    }
634    #[bench]
635    fn predictor01(b: &mut Bencher) {
636        measure_predictor(b, super::apply_predictor_transform_1);
637    }
638    #[bench]
639    fn predictor02(b: &mut Bencher) {
640        measure_predictor(b, super::apply_predictor_transform_2);
641    }
642    #[bench]
643    fn predictor03(b: &mut Bencher) {
644        measure_predictor(b, super::apply_predictor_transform_3);
645    }
646    #[bench]
647    fn predictor04(b: &mut Bencher) {
648        measure_predictor(b, super::apply_predictor_transform_4);
649    }
650    #[bench]
651    fn predictor05(b: &mut Bencher) {
652        measure_predictor(b, super::apply_predictor_transform_5);
653    }
654    #[bench]
655    fn predictor06(b: &mut Bencher) {
656        measure_predictor(b, super::apply_predictor_transform_6);
657    }
658    #[bench]
659    fn predictor07(b: &mut Bencher) {
660        measure_predictor(b, super::apply_predictor_transform_7);
661    }
662    #[bench]
663    fn predictor08(b: &mut Bencher) {
664        measure_predictor(b, super::apply_predictor_transform_8);
665    }
666    #[bench]
667    fn predictor09(b: &mut Bencher) {
668        measure_predictor(b, super::apply_predictor_transform_9);
669    }
670    #[bench]
671    fn predictor10(b: &mut Bencher) {
672        measure_predictor(b, super::apply_predictor_transform_10);
673    }
674    #[bench]
675    fn predictor11(b: &mut Bencher) {
676        measure_predictor(b, super::apply_predictor_transform_11);
677    }
678    #[bench]
679    fn predictor12(b: &mut Bencher) {
680        measure_predictor(b, super::apply_predictor_transform_12);
681    }
682    #[bench]
683    fn predictor13(b: &mut Bencher) {
684        measure_predictor(b, super::apply_predictor_transform_13);
685    }
686
687    #[bench]
688    fn color_transform(b: &mut Bencher) {
689        let width = 256;
690        let height = 256;
691        let size_bits = 3;
692        let mut data = vec![0u8; width * height * 4];
693        let mut transform_data = vec![0u8; (width * height * 4) >> (size_bits * 2)];
694        rand::thread_rng().fill(&mut data[..]);
695        rand::thread_rng().fill(&mut transform_data[..]);
696        b.bytes = 4 * width as u64 * height as u64;
697        b.iter(|| {
698            super::apply_color_transform(
699                black_box(&mut data),
700                black_box(width as u16),
701                black_box(size_bits),
702                black_box(&transform_data),
703            );
704        });
705    }
706
707    #[bench]
708    fn subtract_green(b: &mut Bencher) {
709        let mut data = vec![0u8; 1024 * 4];
710        rand::thread_rng().fill(&mut data[..]);
711        b.bytes = data.len() as u64;
712        b.iter(|| {
713            super::apply_subtract_green_transform(black_box(&mut data));
714        });
715    }
716}