Skip to main content

zune_jpeg/
worker.rs

1/*
2 * Copyright (c) 2023.
3 *
4 * This software is free software;
5 *
6 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7 */
8
9use alloc::format;
10use core::convert::TryInto;
11use core::cmp::min;
12
13use zune_core::colorspace::ColorSpace;
14
15use crate::color_convert::ycbcr_to_grayscale;
16use crate::components::{Components, SampleRatios};
17use crate::decoder::{ColorConvert16Ptr, MAX_COMPONENTS};
18use crate::errors::DecodeErrors;
19
20/// fast 0..255 * 0..255 => 0..255 rounded multiplication
21///
22/// Borrowed from stb
23#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
24#[inline]
25fn blinn_8x8(in_val: u8, y: u8) -> u8 {
26    let t = i32::from(in_val) * i32::from(y) + 128;
27    return ((t + (t >> 8)) >> 8) as u8;
28}
29
30#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
31pub(crate) fn color_convert(
32    unprocessed: &[&[i16]; MAX_COMPONENTS], color_convert_16: ColorConvert16Ptr,
33    input_colorspace: ColorSpace, output_colorspace: ColorSpace, output: &mut [u8], width: usize,
34    padded_width: usize
35) -> Result<(), DecodeErrors> {
36    if input_colorspace.num_components() == 3 && input_colorspace == output_colorspace {
37        // sort things like RGB to RGB conversion
38        copy_removing_padding(unprocessed, width, padded_width, output);
39        return Ok(());
40    }
41    if input_colorspace.num_components() == 4 && input_colorspace == output_colorspace {
42        copy_removing_padding_4x(unprocessed, width, padded_width, output);
43        return Ok(());
44    }
45    // color convert
46    match (input_colorspace, output_colorspace) {
47        (ColorSpace::YCbCr | ColorSpace::Luma, ColorSpace::Luma) => {
48            ycbcr_to_grayscale(unprocessed[0], width, padded_width, output);
49        }
50        (
51            ColorSpace::YCbCr,
52            ColorSpace::RGB | ColorSpace::RGBA | ColorSpace::BGR | ColorSpace::BGRA
53        ) => {
54            color_convert_ycbcr(
55                unprocessed,
56                width,
57                padded_width,
58                output_colorspace,
59                color_convert_16,
60                output
61            );
62        }
63        (ColorSpace::YCCK, ColorSpace::RGB) => {
64            color_convert_ycck_to_rgb::<3>(
65                unprocessed,
66                width,
67                padded_width,
68                output_colorspace,
69                color_convert_16,
70                output
71            );
72        }
73
74        (ColorSpace::YCCK, ColorSpace::RGBA) => {
75            color_convert_ycck_to_rgb::<4>(
76                unprocessed,
77                width,
78                padded_width,
79                output_colorspace,
80                color_convert_16,
81                output
82            );
83        }
84        (ColorSpace::CMYK, ColorSpace::RGB) => {
85            color_convert_cymk_to_rgb::<3>(unprocessed, width, padded_width, output);
86        }
87        (ColorSpace::CMYK, ColorSpace::RGBA) => {
88            color_convert_cymk_to_rgb::<4>(unprocessed, width, padded_width, output);
89        }
90        (ColorSpace::MultiBand(n), _) => {
91            if n.get() != 2 {
92                return Err(DecodeErrors::Format(format!(
93                    "Unknown multiband sample ({n}), please share sample"
94                )));
95            }
96            copy_removing_padding_generic(
97                unprocessed,
98                width,
99                padded_width,
100                output,
101                n.get() as usize
102            );
103        }
104        (ColorSpace::Luma, ColorSpace::RGB) => {
105            // duplicate the luma channel  three times to form RGB
106            // Note, this may assume the direct conversion
107            // from luma to RGB is by duplicating
108            //
109            // There may be a bit more complex ways
110            // of doing it but won't get onto it
111            convert_luma_to_rgb(unprocessed, width, padded_width, output)
112        }
113        (ColorSpace::Luma, ColorSpace::RGBA) => {
114            // duplicate the luma channel  three times to form RGB
115            // add 255 as alpha
116            // Note, this may assume the direct conversion
117            // from luma to RGB is by duplicating
118            //
119            // There may be a bit more complex ways
120            // of doing it but won't get onto it
121            convert_luma_to_rgba(unprocessed, width, padded_width, output)
122        }
123
124        // For the other components we do nothing(currently)
125        _ => {
126            let msg = format!(
127                "Unimplemented colorspace mapping from {input_colorspace:?} to {output_colorspace:?}");
128
129            return Err(DecodeErrors::Format(msg));
130        }
131    }
132    Ok(())
133}
134
135fn convert_luma_to_rgb(
136    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
137) {
138    for (pix_w, y_w) in output
139        .chunks_exact_mut(width * 3)
140        .zip(mcu_block[0].chunks_exact(padded_width))
141    {
142        for (pix, c) in pix_w.chunks_exact_mut(3).zip(y_w) {
143            pix[0] = *c as u8;
144            pix[1] = *c as u8;
145            pix[2] = *c as u8;
146        }
147    }
148}
149fn convert_luma_to_rgba(
150    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
151) {
152    for (pix_w, y_w) in output
153        .chunks_exact_mut(width * 4)
154        .zip(mcu_block[0].chunks_exact(padded_width))
155    {
156        for (pix, c) in pix_w.chunks_exact_mut(4).zip(y_w) {
157            pix[0] = *c as u8;
158            pix[1] = *c as u8;
159            pix[2] = *c as u8;
160            pix[3] = 255;
161        }
162    }
163}
164/// Copy a block to output removing padding bytes from input
165/// if necessary
166#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
167fn copy_removing_padding(
168    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
169) {
170    for (((pix_w, c_w), m_w), y_w) in output
171        .chunks_exact_mut(width * 3)
172        .zip(mcu_block[0].chunks_exact(padded_width))
173        .zip(mcu_block[1].chunks_exact(padded_width))
174        .zip(mcu_block[2].chunks_exact(padded_width))
175    {
176        for (((pix, c), y), m) in pix_w.chunks_exact_mut(3).zip(c_w).zip(m_w).zip(y_w) {
177            pix[0] = *c as u8;
178            pix[1] = *y as u8;
179            pix[2] = *m as u8;
180        }
181    }
182}
183#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
184fn copy_removing_padding_4x(
185    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
186) {
187    for ((((pix_w, c_w), m_w), y_w), k_w) in output
188        .chunks_exact_mut(width * 4)
189        .zip(mcu_block[0].chunks_exact(padded_width))
190        .zip(mcu_block[1].chunks_exact(padded_width))
191        .zip(mcu_block[2].chunks_exact(padded_width))
192        .zip(mcu_block[3].chunks_exact(padded_width))
193    {
194        for ((((pix, c), y), m), k) in pix_w
195            .chunks_exact_mut(4)
196            .zip(c_w)
197            .zip(m_w)
198            .zip(y_w)
199            .zip(k_w)
200        {
201            pix[0] = *c as u8;
202            pix[1] = *y as u8;
203            pix[2] = *m as u8;
204            pix[3] = *k as u8;
205        }
206    }
207}
208#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
209fn copy_removing_padding_generic(
210    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8],
211    channels: usize
212) {
213    match channels {
214        // just do 2 for now
215        2 => {
216            for ((pix_w, y_w), k_w) in output
217                .chunks_exact_mut(width * channels)
218                .zip(mcu_block[0].chunks_exact(padded_width))
219                .zip(mcu_block[1].chunks_exact(padded_width))
220            {
221                for ((pix, c), k) in pix_w.chunks_exact_mut(2).zip(y_w).zip(k_w) {
222                    pix[0] = *c as u8;
223                    pix[1] = *k as u8;
224                }
225            }
226        }
227        _ => unreachable!()
228    }
229}
230/// Convert YCCK image to rgb
231#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
232fn color_convert_ycck_to_rgb<const NUM_COMPONENTS: usize>(
233    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
234    output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
235) {
236    color_convert_ycbcr(
237        mcu_block,
238        width,
239        padded_width,
240        output_colorspace,
241        color_convert_16,
242        output
243    );
244    for (pix_w, m_w) in output
245        .chunks_exact_mut(width * 3)
246        .zip(mcu_block[3].chunks_exact(padded_width))
247    {
248        for (pix, m) in pix_w.chunks_exact_mut(NUM_COMPONENTS).zip(m_w) {
249            let m = (*m) as u8;
250            pix[0] = blinn_8x8(255 - pix[0], m);
251            pix[1] = blinn_8x8(255 - pix[1], m);
252            pix[2] = blinn_8x8(255 - pix[2], m);
253        }
254    }
255}
256
257#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
258fn color_convert_cymk_to_rgb<const NUM_COMPONENTS: usize>(
259    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
260) {
261    for ((((pix_w, c_w), m_w), y_w), k_w) in output
262        .chunks_exact_mut(width * NUM_COMPONENTS)
263        .zip(mcu_block[0].chunks_exact(padded_width))
264        .zip(mcu_block[1].chunks_exact(padded_width))
265        .zip(mcu_block[2].chunks_exact(padded_width))
266        .zip(mcu_block[3].chunks_exact(padded_width))
267    {
268        for ((((pix, c), m), y), k) in pix_w
269            .chunks_exact_mut(3)
270            .zip(c_w)
271            .zip(m_w)
272            .zip(y_w)
273            .zip(k_w)
274        {
275            let c = *c as u8;
276            let m = *m as u8;
277            let y = *y as u8;
278            let k = *k as u8;
279
280            pix[0] = blinn_8x8(c, k);
281            pix[1] = blinn_8x8(m, k);
282            pix[2] = blinn_8x8(y, k);
283        }
284    }
285}
286
287/// Do color-conversion for interleaved MCU
288#[allow(
289    clippy::similar_names,
290    clippy::too_many_arguments,
291    clippy::needless_pass_by_value,
292    clippy::unwrap_used
293)]
294fn color_convert_ycbcr(
295    mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
296    output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
297) {
298    let num_components = output_colorspace.num_components();
299
300    let stride = width * num_components;
301    // Allocate temporary buffer for small widths less than  16.
302    let mut temp = [0; 64];
303    // We need to chunk per width to ensure we can discard extra values at the end of the width.
304    // Since the encoder may pad bits to ensure the width is a multiple of 8.
305    for (((y_width, cb_width), cr_width), out) in mcu_block[0]
306        .chunks_exact(padded_width)
307        .zip(mcu_block[1].chunks_exact(padded_width))
308        .zip(mcu_block[2].chunks_exact(padded_width))
309        .zip(output.chunks_exact_mut(stride))
310    {
311        if width < 16 {
312            // allocate temporary buffers for the values received from idct
313            let mut y_out = [0; 16];
314            let mut cb_out = [0; 16];
315            let mut cr_out = [0; 16];
316            // copy those small widths to that buffer
317            // Use a min with 16 to prevent some panics, see https://github.com/etemesi254/zune-image/issues/331
318            y_out[0..min(y_width.len(), 16)].copy_from_slice(&y_width[0..min(y_width.len(), 16)]);
319            cb_out[0..min(cb_width.len(), 16)]
320                .copy_from_slice(&cb_width[0..min(cb_width.len(), 16)]);
321            cr_out[0..min(cr_width.len(), 16)]
322                .copy_from_slice(&cr_width[0..min(cr_width.len(), 16)]);
323            // we handle widths less than 16 a bit differently, allocating a temporary
324            // buffer and writing to that and then flushing to the out buffer
325            // because of the optimizations applied below,
326            (color_convert_16)(&y_out, &cb_out, &cr_out, &mut temp, &mut 0);
327            // copy to stride
328            out[0..width * num_components].copy_from_slice(&temp[0..width * num_components]);
329            // next
330            continue;
331        }
332
333        // Chunk in outputs of 16 to pass to color_convert as an array of 16 i16's.
334        for (((y, cb), cr), out_c) in y_width
335            .chunks_exact(16)
336            .zip(cb_width.chunks_exact(16))
337            .zip(cr_width.chunks_exact(16))
338            .zip(out.chunks_exact_mut(16 * num_components))
339        {
340            (color_convert_16)(
341                y.try_into().unwrap(),
342                cb.try_into().unwrap(),
343                cr.try_into().unwrap(),
344                out_c,
345                &mut 0
346            );
347        }
348        //we have more pixels in the end that can't be handled by the main loop.
349        //move pointer back a little bit to get last 16 bytes,
350        //color convert, and overwrite
351        //This means some values will be color converted twice.
352        for ((y, cb), cr) in y_width[width - 16..]
353            .chunks_exact(16)
354            .zip(cb_width[width - 16..].chunks_exact(16))
355            .zip(cr_width[width - 16..].chunks_exact(16))
356            .take(1)
357        {
358            (color_convert_16)(
359                y.try_into().unwrap(),
360                cb.try_into().unwrap(),
361                cr.try_into().unwrap(),
362                &mut temp,
363                &mut 0
364            );
365        }
366
367        let rem = out[(width - 16) * num_components..]
368            .chunks_exact_mut(16 * num_components)
369            .next()
370            .unwrap();
371
372        rem.copy_from_slice(&temp[0..rem.len()]);
373    }
374}
375pub(crate) fn upsample(
376    component: &mut Components, mcu_height: usize, i: usize, upsampler_scratch_space: &mut [i16],
377    has_vertical_sample: bool
378) -> Result<(), DecodeErrors> {
379    match component.sample_ratio {
380        SampleRatios::V | SampleRatios::HV => {
381            /*
382            When upsampling vertically sampled images, we have a certain problem
383            which is that we do not have all MCU's decoded, this usually sucks at boundaries
384            e.g we can't upsample the last mcu row, since the row_down currently doesn't exist
385
386            To solve this we need to do two things
387
388            1. Carry over coefficients when we lack enough data to upsample
389            2. Upsample when we have enough data
390
391            To achieve (1), we store a previous row, and the current row in components themselves
392            which will later be used to make (2)
393
394            To achieve (2), we take the stored previous row(second last MCU row),
395            current row(last mcu row) and row down(first row of newly decoded MCU)
396
397            and upsample that and store it in first_row_upsample_dest, this contains
398            up-sampled coefficients for the last for the previous decoded mcu row.
399
400            The caller is then expected to process first_row_upsample_dest before processing data
401            in component.upsample_dest which stores the up-sampled components excluding the last row
402            */
403
404            let mut dest_start = 0;
405            let stride_bytes_written = component.width_stride * component.sample_ratio.sample();
406
407            if i > 0 {
408                // Handle the last MCU of the previous row
409                // This wasn't up-sampled as we didn't have the row_down
410                // so we do it now
411
412                let stride = component.width_stride;
413
414                let dest = &mut component.first_row_upsample_dest[0..stride_bytes_written];
415
416                // get current row
417                let row = &component.row[..];
418                let row_up = &component.row_up[..];
419                let row_down = &component.raw_coeff[0..stride];
420                (component.up_sampler)(row, row_up, row_down, upsampler_scratch_space, dest);
421            }
422
423            // we have the Y component width stride.
424            // this may be higher than the actual width,(2x because vertical sampling)
425            //
426            // This will not upsample the last row
427
428            // if false, do not upsample.
429            // set to false on the last row of an mcu
430            let mut upsample = true;
431
432            let stride = component.width_stride * component.vertical_sample;
433            let stop_offset = component.raw_coeff.len() / component.width_stride;
434
435            if component.raw_coeff.len() != stop_offset * stride {
436                // slice would panic below
437                return Err(DecodeErrors::FormatStatic(
438                    "Invalid component dimensions, would panic"
439                ));
440            }
441            for (pos, curr_row) in component
442                .raw_coeff
443                .chunks_exact(component.width_stride)
444                .enumerate()
445            {
446                let mut dest: &mut [i16] = &mut [];
447                let mut row_up: &[i16] = &[];
448                // row below current sample
449                let mut row_down: &[i16] = &[];
450
451                // Order of ifs matters
452
453                if i == 0 && pos == 0 {
454                    // first IMAGE row, row_up is the same as current row
455                    // row_down is the row below.
456                    row_up = &component.raw_coeff[pos * stride..(pos + 1) * stride];
457                    row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
458                } else if i > 0 && pos == 0 {
459                    // first row of a new mcu, previous row was copied so use that
460                    row_up = &component.row[..];
461                    row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
462                } else if i == mcu_height.saturating_sub(1) && pos == stop_offset - 1 {
463                    // last IMAGE row, adjust pointer to use previous row and current row
464                    row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride];
465                    row_down = &component.raw_coeff[pos * stride..(pos + 1) * stride];
466                } else if pos > 0 && pos < stop_offset - 1 {
467                    // other rows, get row up and row down relative to our current row
468                    // ignore last row of each mcu
469                    row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride];
470                    row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
471                } else if pos == stop_offset - 1 {
472                    // last MCU in a row
473                    //
474                    // we need a row at the next MCU but we haven't decoded that MCU yet
475                    // so we should save this and when we have the next MCU,
476                    // do the upsampling
477
478                    // store the current row and previous row in a buffer
479                    let prev_row = &component.raw_coeff[(pos - 1) * stride..pos * stride];
480
481                    component.row_up.copy_from_slice(prev_row);
482                    component.row.copy_from_slice(curr_row);
483                    upsample = false;
484                } else {
485                    unreachable!("Uh oh!");
486                }
487                if upsample {
488                    dest =
489                        &mut component.upsample_dest[dest_start..dest_start + stride_bytes_written];
490                    dest_start += stride_bytes_written;
491                }
492
493                if upsample {
494                    // upsample
495                    (component.up_sampler)(
496                        curr_row,
497                        row_up,
498                        row_down,
499                        upsampler_scratch_space,
500                        dest
501                    );
502                }
503            }
504        }
505        SampleRatios::H => {
506            //assert_eq!(component.raw_coeff.len() * 2, component.upsample_dest.len());
507            // Before it was an assert, but numerous and numerous and numerous
508            // bug fixes and ad hoc solutions later, I have now just decided  to keep it as a resize
509            component
510                .upsample_dest
511                .resize(component.raw_coeff.len() * 2, 0);
512
513            let raw_coeff = &component.raw_coeff;
514            let dest_coeff = &mut component.upsample_dest;
515
516            if has_vertical_sample {
517                /*
518                There have been images that have the following configurations.
519
520                Component ID:Y    HS:2 VS:2 QT:0
521                Component ID:Cb   HS:1 VS:1 QT:1
522                Component ID:Cr   HS:1 VS:2 QT:1
523
524                This brings out a nasty case of misaligned sampling factors. Cr will need to save a row because
525                of the way we process boundaries but Cb won't since Cr is horizontally sampled while Cb is
526                HV sampled with respect to the image sampling factors.
527
528                So during decoding of one MCU, we could only do 7 and not 8 rows, but the SampleRatio::H never had to
529                save a single line, since it doesn't suffer from boundary issues.
530
531                Now this takes care of that, saving the last MCU row in case it will be needed.
532                We save the previous row before up-sampling this row because the boundary issue is in
533                the last MCU row of the previous MCU.
534
535                PS(cae): I can't add the image to the repo as it is nsfw, but can send if required
536                */
537                let length = component.first_row_upsample_dest.len();
538                component
539                    .first_row_upsample_dest
540                    .copy_from_slice(&dest_coeff.rchunks_exact(length).next().unwrap());
541            }
542            // up-sample each row
543            for (single_row, output_stride) in raw_coeff
544                .chunks_exact(component.width_stride)
545                .zip(dest_coeff.chunks_exact_mut(component.width_stride * 2))
546            {
547                // upsample using the fn pointer, should only be H, so no need for
548                // row up and row down
549                (component.up_sampler)(single_row, &[], &[], &mut [], output_stride);
550            }
551        }
552        SampleRatios::Generic(h, v) => {
553            let raw_coeff = &component.raw_coeff;
554            let dest_coeff = &mut component.upsample_dest;
555
556            //let size =  component.width_stride.div_ceil(v);
557
558            // for (single_row, output_stride) in raw_coeff
559            //     .chunks_exact(size)
560            //     .zip(dest_coeff.chunks_exact_mut(component.width_stride * h))
561            // {
562            //     (component.up_sampler)(single_row, &[], &[], &mut [], output_stride);
563            //
564            // }
565            for (single_row, output_stride) in raw_coeff
566                .chunks_exact(component.width_stride)
567                .zip(dest_coeff.chunks_exact_mut(component.width_stride * h * v))
568            {
569                for row in output_stride.chunks_exact_mut(component.width_stride * h) {
570                    (component.up_sampler)(single_row, &[], &[], &mut [], row);
571                }
572            }
573        }
574        SampleRatios::None => {}
575    };
576    Ok(())
577}