zune_jpeg/
mcu.rs

1/*
2 * Copyright (c) 2023.
3 *
4 * This software is free software;
5 *
6 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7 */
8
9use alloc::{format, vec};
10use core::cmp::min;
11use alloc::vec::Vec;
12use zune_core::bytestream::ZReaderTrait;
13use zune_core::colorspace::ColorSpace;
14use zune_core::colorspace::ColorSpace::Luma;
15use zune_core::log::{error, trace, warn};
16
17use crate::bitstream::BitStream;
18use crate::components::SampleRatios;
19use crate::decoder::MAX_COMPONENTS;
20use crate::errors::DecodeErrors;
21use crate::marker::Marker;
22use crate::mcu_prog::get_marker;
23use crate::misc::{calculate_padded_width, setup_component_params};
24use crate::worker::{color_convert, upsample};
25use crate::JpegDecoder;
26
27/// The size of a DC block for a MCU.
28
29pub const DCT_BLOCK: usize = 64;
30
31impl<T: ZReaderTrait> JpegDecoder<T> {
32    /// Check for existence of DC and AC Huffman Tables
33    pub(crate) fn check_tables(&self) -> Result<(), DecodeErrors> {
34        // check that dc and AC tables exist outside the hot path
35        for component in &self.components {
36            let _ = &self
37                .dc_huffman_tables
38                .get(component.dc_huff_table)
39                .as_ref()
40                .ok_or_else(|| {
41                    DecodeErrors::HuffmanDecode(format!(
42                        "No Huffman DC table for component {:?} ",
43                        component.component_id
44                    ))
45                })?
46                .as_ref()
47                .ok_or_else(|| {
48                    DecodeErrors::HuffmanDecode(format!(
49                        "No DC table for component {:?}",
50                        component.component_id
51                    ))
52                })?;
53
54            let _ = &self
55                .ac_huffman_tables
56                .get(component.ac_huff_table)
57                .as_ref()
58                .ok_or_else(|| {
59                    DecodeErrors::HuffmanDecode(format!(
60                        "No Huffman AC table for component {:?} ",
61                        component.component_id
62                    ))
63                })?
64                .as_ref()
65                .ok_or_else(|| {
66                    DecodeErrors::HuffmanDecode(format!(
67                        "No AC table for component {:?}",
68                        component.component_id
69                    ))
70                })?;
71        }
72        Ok(())
73    }
74
75    /// Decode MCUs and carry out post processing.
76    ///
77    /// This is the main decoder loop for the library, the hot path.
78    ///
79    /// Because of this, we pull in some very crazy optimization tricks hence readability is a pinch
80    /// here.
81    #[allow(
82        clippy::similar_names,
83        clippy::too_many_lines,
84        clippy::cast_possible_truncation
85    )]
86    #[inline(never)]
87    pub(crate) fn decode_mcu_ycbcr_baseline(
88        &mut self, pixels: &mut [u8]
89    ) -> Result<(), DecodeErrors> {
90        setup_component_params(self)?;
91
92        // check dc and AC tables
93        self.check_tables()?;
94
95        let (mut mcu_width, mut mcu_height);
96
97        if self.is_interleaved {
98            // set upsampling functions
99            self.set_upsampling()?;
100
101            mcu_width = self.mcu_x;
102            mcu_height = self.mcu_y;
103        } else {
104            // For non-interleaved images( (1*1) subsampling)
105            // number of MCU's are the widths (+7 to account for paddings) divided bu 8.
106            mcu_width = ((self.info.width + 7) / 8) as usize;
107            mcu_height = ((self.info.height + 7) / 8) as usize;
108        }
109        if self.is_interleaved
110            && self.input_colorspace.num_components() > 1
111            && self.options.jpeg_get_out_colorspace().num_components() == 1
112            && (self.sub_sample_ratio == SampleRatios::V
113                || self.sub_sample_ratio == SampleRatios::HV)
114        {
115            // For a specific set of images, e.g interleaved,
116            // when converting from YcbCr to grayscale, we need to
117            // take into account mcu height since the MCU decoding needs to take
118            // it into account for padding purposes and the post processor
119            // parses two rows per mcu width.
120            //
121            // set coeff to be 2 to ensure that we increment two rows
122            // for every mcu processed also
123            mcu_height *= self.v_max;
124            mcu_height /= self.h_max;
125            self.coeff = 2;
126        }
127
128        if self.input_colorspace == ColorSpace::Luma && self.is_interleaved {
129            warn!("Grayscale image with down-sampled component, resetting component details");
130
131            self.reset_params();
132
133            mcu_width = ((self.info.width + 7) / 8) as usize;
134            mcu_height = ((self.info.height + 7) / 8) as usize;
135        }
136        let width = usize::from(self.info.width);
137
138        let padded_width = calculate_padded_width(width, self.sub_sample_ratio);
139
140        let mut stream = BitStream::new();
141        let mut tmp = [0_i32; DCT_BLOCK];
142
143        let comp_len = self.components.len();
144
145        for (pos, comp) in self.components.iter_mut().enumerate() {
146            // Allocate only needed components.
147            //
148            // For special colorspaces i.e YCCK and CMYK, just allocate all of the needed
149            // components.
150            if min(
151                self.options.jpeg_get_out_colorspace().num_components() - 1,
152                pos
153            ) == pos
154                || comp_len == 4
155            // Special colorspace
156            {
157                // allocate enough space to hold a whole MCU width
158                // this means we should take into account sampling ratios
159                // `*8` is because each MCU spans 8 widths.
160                let len = comp.width_stride * comp.vertical_sample * 8;
161
162                comp.needed = true;
163                comp.raw_coeff = vec![0; len];
164            } else {
165                comp.needed = false;
166            }
167        }
168
169        // If all components are contained in the first scan of MCUs, then we can process into
170        // (upsampled) pixels immediately after each MCU, for convenience we use each row of MCUS.
171        // Otherwise, we must first wait until following SOS provide the remaining components.
172        let all_components_in_first_scan = usize::from(self.num_scans) == self.components.len();
173        let mut progressive_mcus: [Vec<i16>; 4] = core::array::from_fn(|_| vec![]);
174
175        if !all_components_in_first_scan {
176            for (component, mcu) in self.components.iter().zip(&mut progressive_mcus) {
177                let len = mcu_width
178                    * component.vertical_sample
179                    * component.horizontal_sample
180                    * mcu_height
181                    * 64;
182                *mcu = vec![0; len];
183            }
184        }
185
186        let mut pixels_written = 0;
187
188        let is_hv = usize::from(self.is_interleaved);
189        let upsampler_scratch_size = is_hv * self.components[0].width_stride;
190        let mut upsampler_scratch_space = vec![0; upsampler_scratch_size];
191
192        'sos: loop {
193            trace!(
194                "Baseline decoding of components: {:?}",
195                &self.z_order[..usize::from(self.num_scans)]
196            );
197            trace!("Decoding MCU width: {mcu_width}, height: {mcu_height}");
198
199            for i in 0..mcu_height {
200                // Report if we have no more bytes
201                // This may generate false negatives since we over-read bytes
202                // hence that why 37 is chosen(we assume if we over-read more than 37 bytes, we have a problem)
203                if stream.overread_by > 37
204                // favourite number :)
205                {
206                    if self.options.get_strict_mode() {
207                        return Err(DecodeErrors::FormatStatic("Premature end of buffer"));
208                    };
209
210                    error!("Premature end of buffer");
211                    break;
212                }
213
214                // decode a whole MCU width,
215                // this takes into account interleaved components.
216                let terminate = if all_components_in_first_scan {
217                    self.decode_mcu_width::<false>(
218                        mcu_width,
219                        i,
220                        &mut tmp,
221                        &mut stream,
222                        &mut progressive_mcus,
223                    )?
224                } else {
225                    self.decode_mcu_width::<true>(
226                        mcu_width,
227                        i,
228                        &mut tmp,
229                        &mut stream,
230                        &mut progressive_mcus,
231                    )?
232                };
233
234                // process that width up until it's impossible. This is faster than allocation the
235                // full components, which we skipped earlier.
236                if all_components_in_first_scan {
237                    self.post_process(
238                        pixels,
239                        i,
240                        mcu_height,
241                        width,
242                        padded_width,
243                        &mut pixels_written,
244                        &mut upsampler_scratch_space,
245                    )?;
246                }
247
248                match terminate {
249                    McuContinuation::Ok => {}
250                    McuContinuation::AnotherSos if all_components_in_first_scan => {
251                        warn!("More than one SOS despite already having all components");
252                        return Ok(());
253                    }
254                    McuContinuation::AnotherSos => continue 'sos,
255                    McuContinuation::Terminate => {
256                        warn!("Got terminate signal, will not process further");
257                        return Ok(());
258                    }
259                }
260            }
261
262            // Breaks if we get here, looping only if we have restarted, i.e. found another SOS and
263            // continued at `'sos'.
264            break;
265        }
266
267        if !all_components_in_first_scan {
268            self.finish_baseline_decoding(&progressive_mcus, mcu_width, pixels)?;
269        }
270
271        // it may happen that some images don't have the whole buffer
272        // so we can't panic in case of that
273        // assert_eq!(pixels_written, pixels.len());
274
275        // For UHD usecases that tie two images separating them with EOI and
276        // SOI markers, it may happen that we do not reach this image end of image
277        // So this ensures we reach it
278        // Ensure we read EOI
279        if !stream.seen_eoi {
280            let marker = get_marker(&mut self.stream, &mut stream);
281            match marker {
282                Ok(_m) => {
283                    trace!("Found marker {:?}", _m);
284                }
285                Err(_) => {
286                    // ignore error
287                }
288            }
289        }
290
291        trace!("Finished decoding image");
292
293        Ok(())
294    }
295
296    /// Process all MCUs when baseline decoding has been processing them component-after-component.
297    /// For simplicity this assembles the dequantized blocks in the order that the post processing
298    /// of an interleaved baseline decoding would use.
299    #[allow(clippy::too_many_lines)]
300    #[allow(clippy::cast_sign_loss)]
301    pub(crate) fn finish_baseline_decoding(
302        &mut self, block: &[Vec<i16>; MAX_COMPONENTS], _mcu_width: usize, pixels: &mut [u8],
303    ) -> Result<(), DecodeErrors> {
304        let mcu_height = self.mcu_y;
305
306        // Size of our output image(width*height)
307        let is_hv = usize::from(self.is_interleaved);
308        let upsampler_scratch_size = is_hv * self.components[0].width_stride;
309        let width = usize::from(self.info.width);
310        let padded_width = calculate_padded_width(width, self.sub_sample_ratio);
311
312        let mut upsampler_scratch_space = vec![0; upsampler_scratch_size];
313
314        for (pos, comp) in self.components.iter_mut().enumerate() {
315            // Mark only needed components for computing output colors.
316            if min(
317                self.options.jpeg_get_out_colorspace().num_components() - 1,
318                pos,
319            ) == pos
320                || self.input_colorspace == ColorSpace::YCCK
321                || self.input_colorspace == ColorSpace::CMYK
322            {
323                comp.needed = true;
324            } else {
325                comp.needed = false;
326            }
327        }
328
329        let mut pixels_written = 0;
330
331        // dequantize and idct have been performed, only color convert.
332        for i in 0..mcu_height {
333            // All the data is already in the right order, we just need to be able to pass it to
334            // the post_process & upsample method. That expects all the data to be stored as one
335            // row of MCUs in each component's `raw_coeff`.
336            'component: for (position, component) in &mut self.components.iter_mut().enumerate() {
337                if !component.needed {
338                    continue 'component;
339                }
340
341                // step is the number of pixels this iteration wil be handling
342                // Given by the number of mcu's height and the length of the component block
343                // Since the component block contains the whole channel as raw pixels
344                // we this evenly divides the pixels into MCU blocks
345                //
346                // For interleaved images, this gives us the exact pixels comprising a whole MCU
347                // block
348                let step = block[position].len() / mcu_height;
349
350                // where we will be reading our pixels from.
351                let slice = &block[position][i * step..][..step];
352                let temp_channel = &mut component.raw_coeff;
353                temp_channel[..step].copy_from_slice(slice);
354            }
355
356            // process that whole stripe of MCUs
357            self.post_process(
358                pixels,
359                i,
360                mcu_height,
361                width,
362                padded_width,
363                &mut pixels_written,
364                &mut upsampler_scratch_space,
365            )?;
366        }
367
368        return Ok(());
369    }
370
371    fn decode_mcu_width<const PROGRESSIVE: bool>(
372        &mut self, mcu_width: usize, mcu_height: usize, tmp: &mut [i32; 64],
373        stream: &mut BitStream, progressive: &mut [Vec<i16>; 4],
374    ) -> Result<McuContinuation, DecodeErrors> {
375        let z_order = self.z_order;
376
377        for j in 0..mcu_width {
378            // iterate over components
379            for &k in &z_order[..usize::from(self.num_scans)] {
380                let component = &mut self.components[k];
381
382                let dc_table = self.dc_huffman_tables[component.dc_huff_table % MAX_COMPONENTS]
383                    .as_ref()
384                    .unwrap();
385
386                let ac_table = self.ac_huffman_tables[component.ac_huff_table % MAX_COMPONENTS]
387                    .as_ref()
388                    .unwrap();
389
390                let qt_table = &component.quantization_table;
391                let channel = if PROGRESSIVE {
392                    let offset =
393                        mcu_height * component.width_stride * 8 * component.vertical_sample;
394                    &mut progressive[k][offset..]
395                } else {
396                    &mut component.raw_coeff
397                };
398
399                // If image is interleaved iterate over scan components,
400                // otherwise if it-s non-interleaved, these routines iterate in
401                // trivial scanline order(Y,Cb,Cr)
402                for v_samp in 0..component.vertical_sample {
403                    for h_samp in 0..component.horizontal_sample {
404                        // Fill the array with zeroes, decode_mcu_block expects
405                        // a zero based array.
406                        tmp.fill(0);
407
408                        stream.decode_mcu_block(
409                            &mut self.stream,
410                            dc_table,
411                            ac_table,
412                            qt_table,
413                            tmp,
414                            &mut component.dc_pred,
415                        )?;
416
417                        if component.needed {
418                            let idct_position = {
419                                // derived from stb and rewritten for my tastes
420                                let c2 = v_samp * 8;
421                                let c3 = ((j * component.horizontal_sample) + h_samp) * 8;
422
423                                component.width_stride * c2 + c3
424                            };
425
426                            let idct_pos = channel.get_mut(idct_position..).unwrap();
427                            //  call idct.
428                            (self.idct_func)(tmp, idct_pos, component.width_stride);
429                        }
430                    }
431                }
432            }
433
434            self.todo = self.todo.wrapping_sub(1);
435
436            if self.todo == 0 {
437                self.handle_rst_main(stream)?;
438                continue;
439            }
440
441            if stream.marker.is_some() && stream.bits_left == 0 {
442                break;
443            }
444        }
445
446        // After all interleaved components, that's an MCU
447        // handle stream markers
448        //
449        // In some corrupt images, it may occur that header markers occur in the stream.
450        // The spec EXPLICITLY FORBIDS this, specifically, in
451        // routine F.2.2.5  it says
452        // `The only valid marker which may occur within the Huffman coded data is the RSTm marker.`
453        //
454        // But libjpeg-turbo allows it because of some weird reason. so I'll also
455        // allow it because of some weird reason.
456        if let Some(m) = stream.marker {
457            if m == Marker::EOI {
458                // acknowledge and ignore EOI marker.
459                stream.marker.take();
460                trace!("Found EOI marker");
461                // Google Introduced the Ultra-HD image format which is basically
462                // stitching two images into one container.
463                // They basically separate two images via a EOI and SOI marker
464                // so let's just ensure if we ever see EOI, we never read past that
465                // ever.
466                // https://github.com/google/libultrahdr
467                stream.seen_eoi = true;
468            } else if let Marker::RST(_) = m {
469                //debug_assert_eq!(self.todo, 0);
470                if self.todo == 0 {
471                    self.handle_rst(stream)?;
472                }
473            } else if let Marker::SOS = m {
474                self.parse_marker_inner(m)?;
475                stream.marker.take();
476                stream.reset();
477                trace!("Found SOS marker");
478                return Ok(McuContinuation::AnotherSos);
479            } else {
480                if self.options.get_strict_mode() {
481                    return Err(DecodeErrors::Format(format!(
482                        "Marker {m:?} found where not expected"
483                    )));
484                }
485                error!(
486                    "Marker `{:?}` Found within Huffman Stream, possibly corrupt jpeg",
487                    m
488                );
489
490                self.parse_marker_inner(m)?;
491                stream.marker.take();
492                stream.reset();
493                return Ok(McuContinuation::Terminate);
494            }
495        }
496
497        Ok(McuContinuation::Ok)
498    }
499    // handle RST markers.
500    // No-op if not using restarts
501    // this routine is shared with mcu_prog
502    #[cold]
503    pub(crate) fn handle_rst(&mut self, stream: &mut BitStream) -> Result<(), DecodeErrors> {
504        self.todo = self.restart_interval;
505
506        if let Some(marker) = stream.marker {
507            // Found a marker
508            // Read stream and see what marker is stored there
509            match marker {
510                Marker::RST(_) => {
511                    // reset stream
512                    stream.reset();
513                    // Initialize dc predictions to zero for all components
514                    self.components.iter_mut().for_each(|x| x.dc_pred = 0);
515                    // Start iterating again. from position.
516                }
517                Marker::EOI => {
518                    // silent pass
519                }
520                _ => {
521                    return Err(DecodeErrors::MCUError(format!(
522                        "Marker {marker:?} found in bitstream, possibly corrupt jpeg"
523                    )));
524                }
525            }
526        }
527        Ok(())
528    }
529    #[allow(clippy::too_many_lines, clippy::too_many_arguments)]
530    pub(crate) fn post_process(
531        &mut self, pixels: &mut [u8], i: usize, mcu_height: usize, width: usize,
532        padded_width: usize, pixels_written: &mut usize, upsampler_scratch_space: &mut [i16]
533    ) -> Result<(), DecodeErrors> {
534        let out_colorspace_components = self.options.jpeg_get_out_colorspace().num_components();
535
536        let mut px = *pixels_written;
537        // indicates whether image is vertically up-sampled
538        let is_vertically_sampled = self
539            .components
540            .iter()
541            .any(|c| c.sample_ratio == SampleRatios::HV || c.sample_ratio == SampleRatios::V);
542
543        let mut comp_len = self.components.len();
544
545        // If we are moving from YCbCr -> Luma, we do not allocate storage for other components, so we
546        // will panic when we are trying to read samples, so for that case,
547        // hardcode it so that we  don't panic when doing
548        //   *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width]
549        if out_colorspace_components < comp_len && self.options.jpeg_get_out_colorspace() == Luma {
550            comp_len = out_colorspace_components;
551        }
552        let mut color_conv_function =
553            |num_iters: usize, samples: [&[i16]; 4]| -> Result<(), DecodeErrors> {
554                for (pos, output) in pixels[px..]
555                    .chunks_exact_mut(width * out_colorspace_components)
556                    .take(num_iters)
557                    .enumerate()
558                {
559                    let mut raw_samples: [&[i16]; 4] = [&[], &[], &[], &[]];
560
561                    // iterate over each line, since color-convert needs only
562                    // one line
563                    for (j, samp) in raw_samples.iter_mut().enumerate().take(comp_len) {
564                        *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width];
565                    }
566                    color_convert(
567                        &raw_samples,
568                        self.color_convert_16,
569                        self.input_colorspace,
570                        self.options.jpeg_get_out_colorspace(),
571                        output,
572                        width,
573                        padded_width
574                    )?;
575                    px += width * out_colorspace_components;
576                }
577                Ok(())
578            };
579
580        let comps = &mut self.components[..];
581
582        if self.is_interleaved && self.options.jpeg_get_out_colorspace() != ColorSpace::Luma {
583            {
584                // duplicated so that we can check that samples match
585                // Fixes bug https://github.com/etemesi254/zune-image/issues/151
586                let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
587
588                for (samp, component) in samples.iter_mut().zip(comps.iter()) {
589                    *samp = if component.sample_ratio == SampleRatios::None {
590                        &component.raw_coeff
591                    } else {
592                        &component.upsample_dest
593                    };
594                }
595            }
596            for comp in comps.iter_mut() {
597                upsample(
598                    comp,
599                    mcu_height,
600                    i,
601                    upsampler_scratch_space,
602                    is_vertically_sampled
603                );
604            }
605
606            if is_vertically_sampled {
607                if i > 0 {
608                    // write the last line, it wasn't  up-sampled as we didn't have row_down
609                    // yet
610                    let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
611
612                    for (samp, component) in samples.iter_mut().zip(comps.iter()) {
613                        *samp = &component.first_row_upsample_dest;
614                    }
615
616                    // ensure length matches for all samples
617                    let _first_len = samples[0].len();
618
619                    // This was a good check, but can be caused to panic, esp on invalid/corrupt images.
620                    // See one in issue https://github.com/etemesi254/zune-image/issues/262, so for now
621                    // we just ignore and generate invalid images at the end.
622
623                    //
624                    //
625                    // for samp in samples.iter().take(comp_len) {
626                    //     assert_eq!(first_len, samp.len());
627                    // }
628                    let num_iters = self.coeff * self.v_max;
629
630                    color_conv_function(num_iters, samples)?;
631                }
632
633                // After up-sampling the last row, save  any row that can be used for
634                // a later up-sampling,
635                //
636                // E.g the Y sample is not sampled but we haven't finished upsampling the last row of
637                // the previous mcu, since we don't have the down row, so save it
638                for component in comps.iter_mut() {
639                    if component.sample_ratio != SampleRatios::H {
640                        // We don't care about H sampling factors, since it's copied in the workers function
641
642                        // copy last row to be used for the  next color conversion
643                        let size = component.vertical_sample
644                            * component.width_stride
645                            * component.sample_ratio.sample();
646
647                        let last_bytes =
648                            component.raw_coeff.rchunks_exact_mut(size).next().unwrap();
649
650                        component
651                            .first_row_upsample_dest
652                            .copy_from_slice(last_bytes);
653                    }
654                }
655            }
656
657            let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
658
659            for (samp, component) in samples.iter_mut().zip(comps.iter()) {
660                *samp = if component.sample_ratio == SampleRatios::None {
661                    &component.raw_coeff
662                } else {
663                    &component.upsample_dest
664                };
665            }
666
667            // we either do 7 or 8 MCU's depending on the state, this only applies to
668            // vertically sampled images
669            //
670            // for rows up until the last MCU, we do not upsample the last stride of the MCU
671            // which means that the number of iterations should take that into account is one less the
672            // up-sampled size
673            //
674            // For the last MCU, we upsample the last stride, meaning that if we hit the last MCU, we
675            // should sample full raw coeffs
676            let is_last_considered = is_vertically_sampled && (i != mcu_height.saturating_sub(1));
677
678            let num_iters = (8 - usize::from(is_last_considered)) * self.coeff * self.v_max;
679
680            color_conv_function(num_iters, samples)?;
681        } else {
682            let mut channels_ref: [&[i16]; MAX_COMPONENTS] = [&[]; MAX_COMPONENTS];
683
684            self.components
685                .iter()
686                .enumerate()
687                .for_each(|(pos, x)| channels_ref[pos] = &x.raw_coeff);
688
689            if let SampleRatios::Generic(_, v) = self.sub_sample_ratio {
690                color_conv_function(8 * v * self.coeff, channels_ref)?;
691            } else {
692                color_conv_function(8 * self.coeff, channels_ref)?;
693            }
694        }
695
696        *pixels_written = px;
697        Ok(())
698    }
699}
700
701enum McuContinuation {
702    Ok,
703    AnotherSos,
704    Terminate,
705}