zune_jpeg/
worker.rs

1/*
2 * Copyright (c) 2023.
3 *
4 * This software is free software;
5 *
6 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7 */
8
9use alloc::format;
10use core::convert::TryInto;
11
12use zune_core::colorspace::ColorSpace;
13
14use crate::color_convert::ycbcr_to_grayscale;
15use crate::components::{Components, SampleRatios};
16use crate::decoder::{ColorConvert16Ptr, MAX_COMPONENTS};
17use crate::errors::DecodeErrors;
18
19/// fast 0..255 * 0..255 => 0..255 rounded multiplication
20///
21/// Borrowed from stb
22#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
23#[inline]
24fn blinn_8x8(in_val: u8, y: u8) -> u8 {
25    let t = i32::from(in_val) * i32::from(y) + 128;
26    return ((t + (t >> 8)) >> 8) as u8;
27}
28
29#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
30pub(crate) fn color_convert(
31    unprocessed: &[&[i16]; MAX_COMPONENTS], color_convert_16: ColorConvert16Ptr,
32    input_colorspace: ColorSpace, output_colorspace: ColorSpace, output: &mut [u8], width: usize,
33    padded_width: usize
34) -> Result<(), DecodeErrors> // so many parameters..
35{
36    // maximum sampling factors are in Y-channel, no need to pass them.
37
38    if input_colorspace.num_components() == 3 && input_colorspace == output_colorspace {
39        // sort things like RGB to RGB conversion
40        copy_removing_padding(unprocessed, width, padded_width, output);
41        return Ok(());
42    }
43    if input_colorspace.num_components() == 4 && input_colorspace == output_colorspace {
44        copy_removing_padding_4x(unprocessed, width, padded_width, output);
45        return Ok(());
46    }
47    // color convert
48    match (input_colorspace, output_colorspace) {
49        (ColorSpace::YCbCr | ColorSpace::Luma, ColorSpace::Luma) => {
50            ycbcr_to_grayscale(unprocessed[0], width, padded_width, output);
51        }
52        (
53            ColorSpace::YCbCr,
54            ColorSpace::RGB | ColorSpace::RGBA | ColorSpace::BGR | ColorSpace::BGRA
55        ) => {
56            color_convert_ycbcr(
57                unprocessed,
58                width,
59                padded_width,
60                output_colorspace,
61                color_convert_16,
62                output
63            );
64        }
65        (ColorSpace::YCCK, ColorSpace::RGB) => {
66            color_convert_ycck_to_rgb::<3>(
67                unprocessed,
68                width,
69                padded_width,
70                output_colorspace,
71                color_convert_16,
72                output
73            );
74        }
75
76        (ColorSpace::YCCK, ColorSpace::RGBA) => {
77            color_convert_ycck_to_rgb::<4>(
78                unprocessed,
79                width,
80                padded_width,
81                output_colorspace,
82                color_convert_16,
83                output
84            );
85        }
86        (ColorSpace::CMYK, ColorSpace::RGB) => {
87            color_convert_cymk_to_rgb::<3>(unprocessed, width, padded_width, output);
88        }
89        (ColorSpace::CMYK, ColorSpace::RGBA) => {
90            color_convert_cymk_to_rgb::<4>(unprocessed, width, padded_width, output);
91        }
92        // For the other components we do nothing(currently)
93        _ => {
94            let msg = format!(
95                "Unimplemented colorspace mapping from {input_colorspace:?} to {output_colorspace:?}");
96
97            return Err(DecodeErrors::Format(msg));
98        }
99    }
100    Ok(())
101}
102
103/// Copy a block to output removing padding bytes from input
104/// if necessary
105#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
106fn copy_removing_padding(
107    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
108) {
109    for (((pix_w, c_w), m_w), y_w) in output
110        .chunks_exact_mut(width * 3)
111        .zip(mcu_block[0].chunks_exact(padded_width))
112        .zip(mcu_block[1].chunks_exact(padded_width))
113        .zip(mcu_block[2].chunks_exact(padded_width))
114    {
115        for (((pix, c), y), m) in pix_w.chunks_exact_mut(3).zip(c_w).zip(m_w).zip(y_w) {
116            pix[0] = *c as u8;
117            pix[1] = *y as u8;
118            pix[2] = *m as u8;
119        }
120    }
121}
122fn copy_removing_padding_4x(
123    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
124) {
125    for ((((pix_w, c_w), m_w), y_w), k_w) in output
126        .chunks_exact_mut(width * 4)
127        .zip(mcu_block[0].chunks_exact(padded_width))
128        .zip(mcu_block[1].chunks_exact(padded_width))
129        .zip(mcu_block[2].chunks_exact(padded_width))
130        .zip(mcu_block[3].chunks_exact(padded_width))
131    {
132        for ((((pix, c), y), m), k) in pix_w
133            .chunks_exact_mut(4)
134            .zip(c_w)
135            .zip(m_w)
136            .zip(y_w)
137            .zip(k_w)
138        {
139            pix[0] = *c as u8;
140            pix[1] = *y as u8;
141            pix[2] = *m as u8;
142            pix[3] = *k as u8;
143        }
144    }
145}
146/// Convert YCCK image to rgb
147#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
148fn color_convert_ycck_to_rgb<const NUM_COMPONENTS: usize>(
149    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
150    output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
151) {
152    color_convert_ycbcr(
153        mcu_block,
154        width,
155        padded_width,
156        output_colorspace,
157        color_convert_16,
158        output
159    );
160    for (pix_w, m_w) in output
161        .chunks_exact_mut(width * 3)
162        .zip(mcu_block[3].chunks_exact(padded_width))
163    {
164        for (pix, m) in pix_w.chunks_exact_mut(NUM_COMPONENTS).zip(m_w) {
165            let m = (*m) as u8;
166            pix[0] = blinn_8x8(255 - pix[0], m);
167            pix[1] = blinn_8x8(255 - pix[1], m);
168            pix[2] = blinn_8x8(255 - pix[2], m);
169        }
170    }
171}
172
173#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
174fn color_convert_cymk_to_rgb<const NUM_COMPONENTS: usize>(
175    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
176) {
177    for ((((pix_w, c_w), m_w), y_w), k_w) in output
178        .chunks_exact_mut(width * NUM_COMPONENTS)
179        .zip(mcu_block[0].chunks_exact(padded_width))
180        .zip(mcu_block[1].chunks_exact(padded_width))
181        .zip(mcu_block[2].chunks_exact(padded_width))
182        .zip(mcu_block[3].chunks_exact(padded_width))
183    {
184        for ((((pix, c), m), y), k) in pix_w
185            .chunks_exact_mut(3)
186            .zip(c_w)
187            .zip(m_w)
188            .zip(y_w)
189            .zip(k_w)
190        {
191            let c = *c as u8;
192            let m = *m as u8;
193            let y = *y as u8;
194            let k = *k as u8;
195
196            pix[0] = blinn_8x8(c, k);
197            pix[1] = blinn_8x8(m, k);
198            pix[2] = blinn_8x8(y, k);
199        }
200    }
201}
202
203/// Do color-conversion for interleaved MCU
204#[allow(
205    clippy::similar_names,
206    clippy::too_many_arguments,
207    clippy::needless_pass_by_value,
208    clippy::unwrap_used
209)]
210fn color_convert_ycbcr(
211    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
212    output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
213) {
214    let num_components = output_colorspace.num_components();
215
216    let stride = width * num_components;
217    // Allocate temporary buffer for small widths less than  16.
218    let mut temp = [0; 64];
219    // We need to chunk per width to ensure we can discard extra values at the end of the width.
220    // Since the encoder may pad bits to ensure the width is a multiple of 8.
221    for (((y_width, cb_width), cr_width), out) in mcu_block[0]
222        .chunks_exact(padded_width)
223        .zip(mcu_block[1].chunks_exact(padded_width))
224        .zip(mcu_block[2].chunks_exact(padded_width))
225        .zip(output.chunks_exact_mut(stride))
226    {
227        if width < 16 {
228            // allocate temporary buffers for the values received from idct
229            let mut y_out = [0; 16];
230            let mut cb_out = [0; 16];
231            let mut cr_out = [0; 16];
232            // copy those small widths to that buffer
233            y_out[0..y_width.len()].copy_from_slice(y_width);
234            cb_out[0..cb_width.len()].copy_from_slice(cb_width);
235            cr_out[0..cr_width.len()].copy_from_slice(cr_width);
236            // we handle widths less than 16 a bit differently, allocating a temporary
237            // buffer and writing to that and then flushing to the out buffer
238            // because of the optimizations applied below,
239            (color_convert_16)(&y_out, &cb_out, &cr_out, &mut temp, &mut 0);
240            // copy to stride
241            out[0..width * num_components].copy_from_slice(&temp[0..width * num_components]);
242            // next
243            continue;
244        }
245
246        // Chunk in outputs of 16 to pass to color_convert as an array of 16 i16's.
247        for (((y, cb), cr), out_c) in y_width
248            .chunks_exact(16)
249            .zip(cb_width.chunks_exact(16))
250            .zip(cr_width.chunks_exact(16))
251            .zip(out.chunks_exact_mut(16 * num_components))
252        {
253            (color_convert_16)(
254                y.try_into().unwrap(),
255                cb.try_into().unwrap(),
256                cr.try_into().unwrap(),
257                out_c,
258                &mut 0
259            );
260        }
261        //we have more pixels in the end that can't be handled by the main loop.
262        //move pointer back a little bit to get last 16 bytes,
263        //color convert, and overwrite
264        //This means some values will be color converted twice.
265        for ((y, cb), cr) in y_width[width - 16..]
266            .chunks_exact(16)
267            .zip(cb_width[width - 16..].chunks_exact(16))
268            .zip(cr_width[width - 16..].chunks_exact(16))
269            .take(1)
270        {
271            (color_convert_16)(
272                y.try_into().unwrap(),
273                cb.try_into().unwrap(),
274                cr.try_into().unwrap(),
275                &mut temp,
276                &mut 0
277            );
278        }
279
280        let rem = out[(width - 16) * num_components..]
281            .chunks_exact_mut(16 * num_components)
282            .next()
283            .unwrap();
284
285        rem.copy_from_slice(&temp[0..rem.len()]);
286    }
287}
288pub(crate) fn upsample(
289    component: &mut Components, mcu_height: usize, i: usize, upsampler_scratch_space: &mut [i16],
290    has_vertical_sample: bool
291) {
292    match component.sample_ratio {
293        SampleRatios::V | SampleRatios::HV => {
294            /*
295            When upsampling vertically sampled images, we have a certain problem
296            which is that we do not have all MCU's decoded, this usually sucks at boundaries
297            e.g we can't upsample the last mcu row, since the row_down currently doesn't exist
298
299            To solve this we need to do two things
300
301            1. Carry over coefficients when we lack enough data to upsample
302            2. Upsample when we have enough data
303
304            To achieve (1), we store a previous row, and the current row in components themselves
305            which will later be used to make (2)
306
307            To achieve (2), we take the stored previous row(second last MCU row),
308            current row(last mcu row) and row down(first row of newly decoded MCU)
309
310            and upsample that and store it in first_row_upsample_dest, this contains
311            up-sampled coefficients for the last for the previous decoded mcu row.
312
313            The caller is then expected to process first_row_upsample_dest before processing data
314            in component.upsample_dest which stores the up-sampled components excluding the last row
315            */
316
317            let mut dest_start = 0;
318            let stride_bytes_written = component.width_stride * component.sample_ratio.sample();
319
320            if i > 0 {
321                // Handle the last MCU of the previous row
322                // This wasn't up-sampled as we didn't have the row_down
323                // so we do it now
324
325                let stride = component.width_stride;
326
327                let dest = &mut component.first_row_upsample_dest[0..stride_bytes_written];
328
329                // get current row
330                let row = &component.row[..];
331                let row_up = &component.row_up[..];
332                let row_down = &component.raw_coeff[0..stride];
333                (component.up_sampler)(row, row_up, row_down, upsampler_scratch_space, dest);
334            }
335
336            // we have the Y component width stride.
337            // this may be higher than the actual width,(2x because vertical sampling)
338            //
339            // This will not upsample the last row
340
341            // if false, do not upsample.
342            // set to false on the last row of an mcu
343            let mut upsample = true;
344
345            let stride = component.width_stride * component.vertical_sample;
346            let stop_offset = component.raw_coeff.len() / component.width_stride;
347            for (pos, curr_row) in component
348                .raw_coeff
349                .chunks_exact(component.width_stride)
350                .enumerate()
351            {
352                let mut dest: &mut [i16] = &mut [];
353                let mut row_up: &[i16] = &[];
354                // row below current sample
355                let mut row_down: &[i16] = &[];
356
357                // Order of ifs matters
358
359                if i == 0 && pos == 0 {
360                    // first IMAGE row, row_up is the same as current row
361                    // row_down is the row below.
362                    row_up = &component.raw_coeff[pos * stride..(pos + 1) * stride];
363                    row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
364                } else if i > 0 && pos == 0 {
365                    // first row of a new mcu, previous row was copied so use that
366                    row_up = &component.row[..];
367                    row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
368                } else if i == mcu_height.saturating_sub(1) && pos == stop_offset - 1 {
369                    // last IMAGE row, adjust pointer to use previous row and current row
370                    row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride];
371                    row_down = &component.raw_coeff[pos * stride..(pos + 1) * stride];
372                } else if pos > 0 && pos < stop_offset - 1 {
373                    // other rows, get row up and row down relative to our current row
374                    // ignore last row of each mcu
375                    row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride];
376                    row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
377                } else if pos == stop_offset - 1 {
378                    // last MCU in a row
379                    //
380                    // we need a row at the next MCU but we haven't decoded that MCU yet
381                    // so we should save this and when we have the next MCU,
382                    // do the upsampling
383
384                    // store the current row and previous row in a buffer
385                    let prev_row = &component.raw_coeff[(pos - 1) * stride..pos * stride];
386
387                    component.row_up.copy_from_slice(prev_row);
388                    component.row.copy_from_slice(curr_row);
389                    upsample = false;
390                } else {
391                    unreachable!("Uh oh!");
392                }
393                if upsample {
394                    dest =
395                        &mut component.upsample_dest[dest_start..dest_start + stride_bytes_written];
396                    dest_start += stride_bytes_written;
397                }
398
399                if upsample {
400                    // upsample
401                    (component.up_sampler)(
402                        curr_row,
403                        row_up,
404                        row_down,
405                        upsampler_scratch_space,
406                        dest
407                    );
408                }
409            }
410        }
411        SampleRatios::H => {
412            assert_eq!(component.raw_coeff.len() * 2, component.upsample_dest.len());
413
414            let raw_coeff = &component.raw_coeff;
415            let dest_coeff = &mut component.upsample_dest;
416
417            if has_vertical_sample {
418                /*
419                There have been images that have the following configurations.
420
421                Component ID:Y    HS:2 VS:2 QT:0
422                Component ID:Cb   HS:1 VS:1 QT:1
423                Component ID:Cr   HS:1 VS:2 QT:1
424
425                This brings out a nasty case of misaligned sampling factors. Cr will need to save a row because
426                of the way we process boundaries but Cb won't since Cr is horizontally sampled while Cb is
427                HV sampled with respect to the image sampling factors.
428
429                So during decoding of one MCU, we could only do 7 and not 8 rows, but the SampleRatio::H never had to
430                save a single line, since it doesn't suffer from boundary issues.
431
432                Now this takes care of that, saving the last MCU row in case it will be needed.
433                We save the previous row before up-sampling this row because the boundary issue is in
434                the last MCU row of the previous MCU.
435
436                PS(cae): I can't add the image to the repo as it is nsfw, but can send if required
437                */
438                let length = component.first_row_upsample_dest.len();
439                component
440                    .first_row_upsample_dest
441                    .copy_from_slice(&dest_coeff.rchunks_exact(length).next().unwrap());
442            }
443            // up-sample each row
444            for (single_row, output_stride) in raw_coeff
445                .chunks_exact(component.width_stride)
446                .zip(dest_coeff.chunks_exact_mut(component.width_stride * 2))
447            {
448                // upsample using the fn pointer, should only be H, so no need for
449                // row up and row down
450                (component.up_sampler)(single_row, &[], &[], &mut [], output_stride);
451            }
452        }
453        SampleRatios::Generic(h, v) => {
454            let raw_coeff = &component.raw_coeff;
455            let dest_coeff = &mut component.upsample_dest;
456
457
458            for (single_row, output_stride) in raw_coeff
459                .chunks_exact(component.width_stride)
460                .zip(dest_coeff.chunks_exact_mut(component.width_stride * h*v))
461            {
462                // upsample using the fn pointer, should only be H, so no need for
463                // row up and row down
464                (component.up_sampler)(single_row, &[], &[], &mut [], output_stride);
465            }
466        }
467        SampleRatios::None => {}
468    };
469}
zune_jpeg/worker.rs

zune_jpeg/
worker.rs