Skip to main content

zune_jpeg/
mcu.rs

1/*
2 * Copyright (c) 2023.
3 *
4 * This software is free software;
5 *
6 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7 */
8
9use alloc::vec::Vec;
10use alloc::{format, vec};
11use core::cmp::min;
12
13use zune_core::bytestream::ZByteReaderTrait;
14use zune_core::colorspace::ColorSpace;
15use zune_core::colorspace::ColorSpace::Luma;
16use zune_core::log::{error, trace, warn};
17
18use crate::bitstream::BitStream;
19use crate::components::SampleRatios;
20use crate::decoder::MAX_COMPONENTS;
21use crate::errors::DecodeErrors;
22use crate::marker::Marker;
23use crate::mcu_prog::get_marker;
24use crate::misc::{calculate_padded_width, setup_component_params};
25use crate::worker::{color_convert, upsample};
26use crate::JpegDecoder;
27
28/// The size of a DC block for a MCU.
29
30pub const DCT_BLOCK: usize = 64;
31
32impl<T: ZByteReaderTrait> JpegDecoder<T> {
33    /// Check for existence of DC and AC Huffman Tables
34    pub(crate) fn check_tables(&self) -> Result<(), DecodeErrors> {
35        // check that dc and AC tables exist outside the hot path
36        for component in &self.components {
37            let _ = &self
38                .dc_huffman_tables
39                .get(component.dc_huff_table)
40                .as_ref()
41                .ok_or_else(|| {
42                    DecodeErrors::HuffmanDecode(format!(
43                        "No Huffman DC table for component {:?} ",
44                        component.component_id
45                    ))
46                })?
47                .as_ref()
48                .ok_or_else(|| {
49                    DecodeErrors::HuffmanDecode(format!(
50                        "No DC table for component {:?}",
51                        component.component_id
52                    ))
53                })?;
54
55            let _ = &self
56                .ac_huffman_tables
57                .get(component.ac_huff_table)
58                .as_ref()
59                .ok_or_else(|| {
60                    DecodeErrors::HuffmanDecode(format!(
61                        "No Huffman AC table for component {:?} ",
62                        component.component_id
63                    ))
64                })?
65                .as_ref()
66                .ok_or_else(|| {
67                    DecodeErrors::HuffmanDecode(format!(
68                        "No AC table for component {:?}",
69                        component.component_id
70                    ))
71                })?;
72        }
73        Ok(())
74    }
75
76    /// Decode MCUs and carry out post processing.
77    ///
78    /// This is the main decoder loop for the library, the hot path.
79    ///
80    /// Because of this, we pull in some very crazy optimization tricks hence readability is a pinch
81    /// here.
82    #[allow(
83        clippy::similar_names,
84        clippy::too_many_lines,
85        clippy::cast_possible_truncation
86    )]
87    #[inline(never)]
88    pub(crate) fn decode_mcu_ycbcr_baseline(
89        &mut self, pixels: &mut [u8]
90    ) -> Result<(), DecodeErrors> {
91        setup_component_params(self)?;
92
93        // check dc and AC tables
94        self.check_tables()?;
95
96        let (mut mcu_width, mut mcu_height);
97
98        if self.is_interleaved {
99            // set upsampling functions
100            self.set_upsampling()?;
101
102            mcu_width = self.mcu_x;
103            mcu_height = self.mcu_y;
104        } else {
105            // For non-interleaved images( (1*1) subsampling)
106            // number of MCU's are the widths (+7 to account for paddings) divided bu 8.
107            mcu_width = (self.info.width as usize + 7) / 8;
108            mcu_height = (self.info.height as usize + 7) / 8;
109        }
110        if self.is_interleaved
111            && self.input_colorspace.num_components() > 1
112            && self.options.jpeg_get_out_colorspace().num_components() == 1
113            && (self.info.sample_ratio == SampleRatios::V
114                || self.info.sample_ratio == SampleRatios::HV)
115        {
116            // For a specific set of images, e.g interleaved,
117            // when converting from YcbCr to grayscale, we need to
118            // take into account mcu height since the MCU decoding needs to take
119            // it into account for padding purposes and the post processor
120            // parses two rows per mcu width.
121            //
122            // set coeff to be 2 to ensure that we increment two rows
123            // for every mcu processed also
124            mcu_height *= self.v_max;
125            mcu_height /= self.h_max;
126            self.coeff = 2;
127        }
128
129        if self.input_colorspace == ColorSpace::Luma && self.is_interleaved {
130            warn!("Grayscale image with down-sampled component, resetting component details");
131
132            self.reset_params();
133
134            mcu_width = ((self.info.width + 7) / 8) as usize;
135            mcu_height = ((self.info.height + 7) / 8) as usize;
136        }
137        let width = usize::from(self.info.width);
138
139        let padded_width = calculate_padded_width(width, self.info.sample_ratio);
140
141        let mut stream = BitStream::new();
142        let mut tmp = [0_i32; DCT_BLOCK];
143
144        let comp_len = self.components.len();
145
146        for (pos, comp) in self.components.iter_mut().enumerate() {
147            // Allocate only needed components.
148            //
149            // For special colorspaces i.e YCCK and CMYK, just allocate all of the needed
150            // components.
151            if min(
152                self.options.jpeg_get_out_colorspace().num_components() - 1,
153                pos
154            ) == pos
155                || comp_len == 4
156            // Special colorspace
157            {
158                // allocate enough space to hold a whole MCU width
159                // this means we should take into account sampling ratios
160                // `*8` is because each MCU spans 8 widths.
161                let len = comp.width_stride * comp.vertical_sample * 8;
162
163                comp.needed = true;
164                comp.raw_coeff = vec![0; len];
165            } else {
166                comp.needed = false;
167            }
168        }
169
170        // If all components are contained in the first scan of MCUs, then we can process into
171        // (upsampled) pixels immediately after each MCU, for convenience we use each row of MCUS.
172        // Otherwise, we must first wait until following SOS provide the remaining components.
173        let all_components_in_first_scan = usize::from(self.num_scans) == self.components.len();
174        let mut progressive_mcus: [Vec<i16>; 4] = core::array::from_fn(|_| vec![]);
175
176        if !all_components_in_first_scan {
177            for (component, mcu) in self.components.iter().zip(&mut progressive_mcus) {
178                let len = mcu_width
179                    * component.vertical_sample
180                    * component.horizontal_sample
181                    * mcu_height
182                    * 64;
183                *mcu = vec![0; len];
184            }
185        }
186
187        let mut pixels_written = 0;
188
189        let is_hv = usize::from(self.is_interleaved);
190        let upsampler_scratch_size = is_hv * self.components.iter().map(|x| x.width_stride).max().unwrap_or(0) * 8;
191        let mut upsampler_scratch_space = vec![0; upsampler_scratch_size];
192
193        'sos: loop {
194            trace!(
195                "Baseline decoding of components: {:?}",
196                &self.z_order[..usize::from(self.num_scans)]
197            );
198
199            trace!("Decoding MCU width: {mcu_width}, height: {mcu_height}");
200
201            for i in 0..mcu_height {
202                if stream.overread_by > 0 {
203                    pixels.get_mut(pixels_written..).map(|v| v.fill(128));
204                    if self.options.strict_mode() {
205                        return Err(DecodeErrors::FormatStatic("Premature end of buffer"));
206                    };
207
208                    error!("Premature end of buffer");
209                    break;
210                }
211
212                // decode a whole MCU width,
213                // this takes into account interleaved components.
214                let terminate = if all_components_in_first_scan {
215                    self.decode_mcu_width::<false>(
216                        mcu_width,
217                        i,
218                        &mut tmp,
219                        &mut stream,
220                        &mut progressive_mcus
221                    )?
222                } else {
223                    /* NB: (cae). This code was added due to the issue at https://github.com/etemesi254/zune-image/issues/277
224                    *
225                    * There is a particular set of images that interleave the start of scan (SOS) with the MCU,
226                    * E.g if it's a three component image, we have SOS->MCU ->SOS->MCU ->SOS->MCU
227                    * which presents a problem on decoding, we need to buffer the whole image before continuing since
228                    * we won't have a row containing all the component data which will be needed e.g for color conversion.
229                    *
230                    * The mechanisms is that we decode the whole image upfront, which goes against the normal
231                    * routine of decoding MCU width , so this requires more memory upfront than initial routines
232                    * but it is a single image out of the many corpuses that exist, so its fine.
233                    * (image in test-images/jpeg/sos_news.jpeg)
234
235                    * Code contributed by  Aurelia Molzer (https://github.com/197g)
236
237                    *
238                    */
239
240                    self.decode_mcu_width::<true>(
241                        mcu_width,
242                        i,
243                        &mut tmp,
244                        &mut stream,
245                        &mut progressive_mcus
246                    )?
247                };
248
249                // process that width up until it's impossible. This is faster than allocation the
250                // full components, which we skipped earlier.
251                if all_components_in_first_scan {
252                    self.post_process(
253                        pixels,
254                        i,
255                        mcu_height,
256                        width,
257                        padded_width,
258                        &mut pixels_written,
259                        &mut upsampler_scratch_space
260                    )?;
261                }
262
263                match terminate {
264                    McuContinuation::Ok => {}
265                    McuContinuation::AnotherSos if all_components_in_first_scan => {
266                        warn!("More than one SOS despite already having all components");
267                        return Ok(());
268                    }
269                    McuContinuation::AnotherSos => continue 'sos,
270                    McuContinuation::InterScanMarker(marker) => {
271                        // Handle inter-scan markers (DHT/DQT/etc) uniformly here.
272                        // This keeps all marker handling in the outer loop.
273                        if self.advance_to_next_sos(marker, &mut stream)? {
274                            continue 'sos;
275                        } else {
276                            // Hit EOI
277                            break;
278                        }
279                    }
280                    McuContinuation::Terminate => {
281                        warn!("Got terminate signal, will not process further");
282                        pixels.get_mut(pixels_written..).map(|v| v.fill(128));
283                        return Ok(());
284                    }
285                }
286            }
287
288            // Breaks if we get here, looping only if we have restarted, i.e. found another SOS and
289            // continued at `'sos'.
290            break;
291        }
292
293        if !all_components_in_first_scan {
294            self.finish_baseline_decoding(&progressive_mcus, mcu_width, pixels)?;
295        }
296
297        // it may happen that some images don't have the whole buffer
298        // so we can't panic in case of that
299        // assert_eq!(pixels_written, pixels.len());
300
301        // For UHD usecases that tie two images separating them with EOI and
302        // SOI markers, it may happen that we do not reach this image end of image
303        // So this ensures we reach it
304        // Ensure we read EOI
305        if !stream.seen_eoi {
306            let marker = get_marker(&mut self.stream, &mut stream);
307            match marker {
308                Ok(_m) => {
309                    trace!("Found marker {:?}", _m);
310                }
311                Err(_) => {
312                    // ignore error
313                }
314            }
315        }
316
317        trace!("Finished decoding image");
318
319        Ok(())
320    }
321
322    /// Process all MCUs when baseline decoding has been processing them component-after-component.
323    /// For simplicity this assembles the dequantized blocks in the order that the post processing
324    /// of an interleaved baseline decoding would use.
325    #[allow(clippy::too_many_lines)]
326    #[allow(clippy::cast_sign_loss)]
327    pub(crate) fn finish_baseline_decoding(
328        &mut self, block: &[Vec<i16>; MAX_COMPONENTS], _mcu_width: usize, pixels: &mut [u8]
329    ) -> Result<(), DecodeErrors> {
330        let mcu_height = self.mcu_y;
331
332        // Size of our output image(width*height)
333        let is_hv = usize::from(self.is_interleaved);
334        let upsampler_scratch_size = is_hv * self.components[0].width_stride;
335        let width = usize::from(self.info.width);
336        let padded_width = calculate_padded_width(width, self.info.sample_ratio);
337
338        let mut upsampler_scratch_space = vec![0; upsampler_scratch_size];
339
340        for (pos, comp) in self.components.iter_mut().enumerate() {
341            // Mark only needed components for computing output colors.
342            if min(
343                self.options.jpeg_get_out_colorspace().num_components() - 1,
344                pos
345            ) == pos
346                || self.input_colorspace == ColorSpace::YCCK
347                || self.input_colorspace == ColorSpace::CMYK
348            {
349                comp.needed = true;
350            } else {
351                comp.needed = false;
352            }
353        }
354
355        let mut pixels_written = 0;
356
357        // dequantize and idct have been performed, only color convert.
358        for i in 0..mcu_height {
359            // All the data is already in the right order, we just need to be able to pass it to
360            // the post_process & upsample method. That expects all the data to be stored as one
361            // row of MCUs in each component's `raw_coeff`.
362            'component: for (position, component) in &mut self.components.iter_mut().enumerate() {
363                if !component.needed {
364                    continue 'component;
365                }
366
367                // step is the number of pixels this iteration wil be handling
368                // Given by the number of mcu's height and the length of the component block
369                // Since the component block contains the whole channel as raw pixels
370                // we this evenly divides the pixels into MCU blocks
371                //
372                // For interleaved images, this gives us the exact pixels comprising a whole MCU
373                // block
374                let step = block[position].len() / mcu_height;
375
376                // where we will be reading our pixels from.
377                let slice = &block[position][i * step..][..step];
378                let temp_channel = &mut component.raw_coeff;
379                temp_channel[..step].copy_from_slice(slice);
380            }
381
382            // process that whole stripe of MCUs
383            self.post_process(
384                pixels,
385                i,
386                mcu_height,
387                width,
388                padded_width,
389                &mut pixels_written,
390                &mut upsampler_scratch_space
391            )?;
392        }
393
394        return Ok(());
395    }
396
397    fn decode_mcu_width<const PROGRESSIVE: bool>(
398        &mut self, mcu_width: usize, mcu_height: usize, tmp: &mut [i32; 64],
399        stream: &mut BitStream, progressive: &mut [Vec<i16>; 4]
400    ) -> Result<McuContinuation, DecodeErrors> {
401        let is_one_by_one = !self.scan_subsampled;
402
403        // The definition of MCU depends on the sampling factor of involved scans. When components
404        // have different factors then each Minimal-Coding-Unit is the least common multiple such
405        // that we have an integer number of blocks from each component. But the decoding of these
406        // components differs from it otherwise, we need an inner loop with a dynamic amount of
407        // coefficients per component, whereas otherwise we have exactly one block of coefficients
408        // encoded for each component in the bitstream order.
409        //
410        // We statically specialize on this to improve code generation of the common case a little
411        // bit. We could also special case common sub-sampling cases but be mindful of code bloat.
412        if is_one_by_one {
413            self.inner_decode_mcu_width::<PROGRESSIVE, false>(
414                mcu_width,
415                mcu_height,
416                tmp,
417                stream,
418                progressive
419            )
420        } else {
421            self.inner_decode_mcu_width::<PROGRESSIVE, true>(
422                mcu_width,
423                mcu_height,
424                tmp,
425                stream,
426                progressive
427            )
428        }
429    }
430
431    // Inline-never ensures we do get this function optimize on its own, into two different
432    // versions, without the optimizer tripping up over the complexity that comes with the
433    // constant folding. And constant folding is quite important for performance here as
434    // when `not SAMPLED` then the inner loop has exactly one iteration per component in
435    // the scan. The difference was ~1% or a bit more.
436    fn inner_decode_mcu_width<const PROGRESSIVE: bool, const SAMPLED: bool>(
437        &mut self, mcu_width: usize, mcu_height: usize, tmp: &mut [i32; 64],
438        stream: &mut BitStream, progressive: &mut [Vec<i16>; 4]
439    ) -> Result<McuContinuation, DecodeErrors> {
440        let z_order = self.z_order;
441        let z_scans = &z_order[..usize::from(self.num_scans)];
442
443        // How much of the head of `tmp` was written by the last MCU decoding? We only check for
444        // two different cases and not all possible outcomes as this is only used to optimize the
445        // bytes written in `fill`. Since the clobber happens in UNZIGZAG order we'd be straddling
446        // most cache lines anyways even if we did a partial write with the exact length of the
447        // coefficient data which was written into `tmp`.
448        let mut clobber_more_than_4x4 = true;
449
450        // For non-interleaved scans (PROGRESSIVE=true), each scan contains a single component
451        // and we iterate over that component's actual data unit count, not the interleaved MCU
452        // width multiplied by sampling factor.
453        let mut scan_du_width = if PROGRESSIVE {
454            let k = z_scans[0];
455            let comp = &self.components[k];
456            // Calculate actual data units for this component: ceil(width / (8 * subsampling_ratio))
457            (self.info.width as usize * comp.horizontal_sample + self.h_max * 8 - 1)
458                / (self.h_max * 8)
459        } else {
460            mcu_width
461        };
462        // In malformed scans that list multiple components, clamp to the smallest row capacity
463        // to avoid writing past the row buffer.
464        if PROGRESSIVE && z_scans.len() > 1 {
465            let min_du = z_scans
466                .iter()
467                .map(|&k| self.components[k].width_stride / 8)
468                .min()
469                .unwrap_or(0);
470            scan_du_width = scan_du_width.min(min_du);
471        }
472
473        for j in 0..scan_du_width {
474            // iterate over components
475            for &k in z_scans {
476                // we made this loop body massive due to several different paths that depend on
477                // static conditions. Note we (potentially) call into other functions so the
478                // compiler will not unroll anything here anyways. The gains from separating
479                // differently optimized loop bodies are much greater than a single additional jump
480                // here.
481                let component = &mut self.components[k];
482
483                let dc_table = self.dc_huffman_tables[component.dc_huff_table % MAX_COMPONENTS]
484                    .as_ref()
485                    .ok_or(DecodeErrors::FormatStatic("DC table not found"))?;
486
487                let ac_table = self.ac_huffman_tables[component.ac_huff_table % MAX_COMPONENTS]
488                    .as_ref()
489                    .ok_or(DecodeErrors::FormatStatic("AC table not found"))?;
490
491                let qt_table = &component.quantization_table;
492                let channel = if PROGRESSIVE {
493                    let offset =
494                        mcu_height * component.width_stride * 8 * component.vertical_sample;
495                    // Small stopgap for https://github.com/etemesi254/zune-image/issues/362
496                    if offset >= progressive[k].len(){
497                        return Err(DecodeErrors::FormatStatic("Would panic on slice iteration"))
498                    }
499                    &mut progressive[k][offset..]
500                } else {
501                    &mut component.raw_coeff
502                };
503
504                let component_samples_needed = component.needed;
505
506                // If image is interleaved iterate over scan components,
507                // otherwise if it-s non-interleaved, these routines iterate in
508                // trivial scanline order(Y,Cb,Cr)
509                //
510                // Turn the bounds into a compile time constant for a common special case. This
511                // allows the compiler to unroll the loop and then do a bunch of interleaving.
512                //
513                // For PROGRESSIVE (non-interleaved), we iterate data units directly so
514                // h_samp/v_samp loops run exactly once.
515                let v_step =
516                    if SAMPLED && !PROGRESSIVE { 0..component.vertical_sample } else { 0..1 };
517
518                for v_samp in v_step {
519                    let h_step =
520                        if SAMPLED && !PROGRESSIVE { 0..component.horizontal_sample } else { 0..1 };
521
522                    for h_samp in h_step {
523                        let result = if component_samples_needed {
524                            // Fill the array with zeroes, decode_mcu_block expects
525                            // a zero based array. Clobber is in zig-zag order though.
526                            // Writing consecutive entries is basically free in terms
527                            // of memory throughput so we opt for a larger power of
528                            // two which lets the compiler turn this into a repeated
529                            // write of a zeroed vector register, which does not have
530                            // any branches, instead of a more difficult pattern where
531                            // we attempt to overwrite exactly one coefficient.
532                            let clobber_len = if !clobber_more_than_4x4 { 32 } else { 64 };
533
534                            tmp[..clobber_len].fill(0);
535
536                            stream.decode_mcu_block(
537                                &mut self.stream,
538                                dc_table,
539                                ac_table,
540                                qt_table,
541                                tmp,
542                                &mut component.dc_pred
543                            )
544                        } else {
545                            // We do not touch tmp so there is no need to reset it.
546                            stream.discard_mcu_block(&mut self.stream, dc_table, ac_table)
547                        };
548
549                        // If an error occurs we can either propagate it
550                        // as an error or print it and call terminate.
551                        //
552                        // This allows even corrupt images to render something,
553                        // even if its bad, matching browsers.
554                        //
555                        // See example in https://github.com/etemesi254/zune-image/issues/293
556                        let len = if let Ok(len) = result {
557                            len
558                        } else {
559                            // result.is_err()
560                            return if self.options.strict_mode() {
561                                Err(result.err().unwrap())
562                            } else {
563                                error!("{}", result.err().unwrap());
564                                Ok(McuContinuation::Terminate)
565                            };
566                        };
567
568                        if component_samples_needed {
569                            // tmp was only written partially, note that len is in ZigZag order.
570                            clobber_more_than_4x4 = len > 10;
571
572                            let idct_position = if PROGRESSIVE {
573                                // For non-interleaved, j indexes data units directly
574                                j * 8
575                            } else {
576                                // derived from stb and rewritten for my tastes
577                                let c2 = v_samp * 8;
578                                let c3 = ((j * component.horizontal_sample) + h_samp) * 8;
579
580                                component.width_stride * c2 + c3
581                            };
582
583                            let idct_pos = channel.get_mut(idct_position..).unwrap();
584
585                            if len <= 1 {
586                                (self.idct_1x1_func)(tmp, idct_pos, component.width_stride);
587                            } else if len <= 10 {
588                                (self.idct_4x4_func)(tmp, idct_pos, component.width_stride);
589                            } else {
590                                //  call idct.
591                                (self.idct_func)(tmp, idct_pos, component.width_stride);
592                            }
593                        }
594                    }
595                }
596            }
597
598            self.todo = self.todo.wrapping_sub(1);
599
600            if self.todo == 0 {
601                self.handle_rst_main(stream)?;
602                continue;
603            }
604
605            if stream.marker.is_some() && stream.bits_left == 0 {
606                break;
607            }
608        }
609
610        self.check_stream_marker_after_mcu_width(stream)
611    }
612
613    fn check_stream_marker_after_mcu_width(
614        &mut self, stream: &mut BitStream
615    ) -> Result<McuContinuation, DecodeErrors> {
616        // After all interleaved components, that's an MCU
617        // handle stream markers
618        //
619        // In some corrupt images, it may occur that header markers occur in the stream.
620        // The spec EXPLICITLY FORBIDS this, specifically, in
621        // routine F.2.2.5  it says
622        // `The only valid marker which may occur within the Huffman coded data is the RSTm marker.`
623        //
624        // But libjpeg-turbo allows it because of some weird reason. so I'll also
625        // allow it because of some weird reason.
626        if let Some(m) = stream.marker {
627            if m == Marker::EOI {
628                // acknowledge and ignore EOI marker.
629                stream.marker.take();
630                trace!("Found EOI marker");
631                // Google Introduced the Ultra-HD image format which is basically
632                // stitching two images into one container.
633                // They basically separate two images via a EOI and SOI marker
634                // so let's just ensure if we ever see EOI, we never read past that
635                // ever.
636                // https://github.com/google/libultrahdr
637                stream.seen_eoi = true;
638            } else if let Marker::RST(_) = m {
639                //debug_assert_eq!(self.todo, 0);
640                if self.todo == 0 {
641                    self.handle_rst(stream)?;
642                }
643            } else if let Marker::SOS = m {
644                self.parse_marker_inner(m)?;
645                stream.marker.take();
646                stream.reset();
647                trace!("Found SOS marker");
648                return Ok(McuContinuation::AnotherSos);
649            } else if matches!(m, Marker::DHT | Marker::DQT | Marker::DRI | Marker::COM)
650                || matches!(m, Marker::APP(_))
651            {
652                // For non-interleaved images, setup markers can appear between scans.
653                // Signal the caller to handle this marker and find the next SOS.
654                // This keeps all marker parsing in the caller's loop.
655                stream.marker.take();
656                trace!("Found inter-scan marker {:?}", m);
657                return Ok(McuContinuation::InterScanMarker(m));
658            } else {
659                if self.options.strict_mode() {
660                    return Err(DecodeErrors::Format(format!(
661                        "Marker {m:?} found where not expected"
662                    )));
663                }
664                error!(
665                    "Marker `{:?}` Found within Huffman Stream, possibly corrupt jpeg",
666                    m
667                );
668
669                self.parse_marker_inner(m)?;
670                stream.marker.take();
671                stream.reset();
672                return Ok(McuContinuation::Terminate);
673            }
674        }
675
676        Ok(McuContinuation::Ok)
677    }
678
679    /// Scan for the next SOS marker, parsing setup markers along the way.
680    ///
681    /// This is the unified marker scanning function used after encountering an
682    /// inter-scan marker. It handles DHT, DQT, DRI, COM, and APP markers that
683    /// can appear between scans in non-interleaved images.
684    ///
685    /// # Arguments
686    /// * `first_marker` - The first marker that was already detected (not yet parsed)
687    /// * `stream` - The bitstream state
688    ///
689    /// # Returns
690    /// * `Ok(true)` - Found SOS, ready to continue decoding
691    /// * `Ok(false)` - Found EOI, decoding complete
692    /// * `Err(_)` - Error (too many markers, unexpected marker in strict mode, etc.)
693    fn advance_to_next_sos(
694        &mut self,
695        first_marker: Marker,
696        stream: &mut BitStream
697    ) -> Result<bool, DecodeErrors> {
698        // Limit iterations to prevent DoS from malicious files.
699        const MAX_INTER_SCAN_MARKERS: usize = 64;
700
701        // Parse the first marker that triggered this call
702        self.parse_marker_inner(first_marker)?;
703        stream.reset();
704
705        for _ in 0..MAX_INTER_SCAN_MARKERS {
706            let marker = get_marker(&mut self.stream, stream)?;
707
708            match marker {
709                Marker::SOS => {
710                    self.parse_marker_inner(Marker::SOS)?;
711                    stream.reset();
712                    trace!("Found SOS marker, continuing decode");
713                    return Ok(true);
714                }
715                Marker::EOI => {
716                    stream.seen_eoi = true;
717                    trace!("Found EOI marker");
718                    return Ok(false);
719                }
720                Marker::DHT | Marker::DQT | Marker::DRI | Marker::COM => {
721                    trace!("Parsing inter-scan marker {:?}", marker);
722                    self.parse_marker_inner(marker)?;
723                }
724                Marker::APP(_) => {
725                    trace!("Parsing inter-scan APP marker {:?}", marker);
726                    self.parse_marker_inner(marker)?;
727                }
728                other => {
729                    if self.options.strict_mode() {
730                        return Err(DecodeErrors::Format(format!(
731                            "Unexpected marker {:?} while scanning for SOS between scans",
732                            other
733                        )));
734                    }
735                    // Non-strict: skip unknown marker
736                    warn!("Skipping unexpected marker {:?} between scans", other);
737                    let length = self.stream.get_u16_be_err()?;
738                    if length >= 2 {
739                        self.stream.skip((length - 2) as usize)?;
740                    }
741                }
742            }
743        }
744
745        Err(DecodeErrors::FormatStatic(
746            "Too many markers between scans (exceeded limit of 64)"
747        ))
748    }
749
750    // handle RST markers.
751    // No-op if not using restarts
752    // this routine is shared with mcu_prog
753    #[cold]
754    pub(crate) fn handle_rst(&mut self, stream: &mut BitStream) -> Result<(), DecodeErrors> {
755        self.todo = self.restart_interval;
756
757        if let Some(marker) = stream.marker {
758            // Found a marker
759            // Read stream and see what marker is stored there
760            match marker {
761                Marker::RST(_) => {
762                    // reset stream
763                    stream.reset();
764                    // Initialize dc predictions to zero for all components
765                    self.components.iter_mut().for_each(|x| x.dc_pred = 0);
766                    // Start iterating again. from position.
767                }
768                Marker::EOI => {
769                    // silent pass
770                }
771                // Valid markers that can appear between scans at a restart boundary
772                // (restart interval aligns with end of scan). Leave for caller.
773                Marker::SOS | Marker::DHT | Marker::DQT | Marker::DRI | Marker::COM
774                | Marker::APP(_) => {}
775                _ => {
776                    if self.options.strict_mode() {
777                        return Err(DecodeErrors::MCUError(format!(
778                            "Unexpected marker {marker:?} at restart boundary"
779                        )));
780                    }
781                    warn!("Unexpected marker {:?} at restart boundary", marker);
782                }
783            }
784        }
785        Ok(())
786    }
787    #[allow(clippy::too_many_lines, clippy::too_many_arguments)]
788    pub(crate) fn post_process(
789        &mut self, pixels: &mut [u8], i: usize, mcu_height: usize, width: usize,
790        padded_width: usize, pixels_written: &mut usize, upsampler_scratch_space: &mut [i16]
791    ) -> Result<(), DecodeErrors> {
792        let out_colorspace_components = self.options.jpeg_get_out_colorspace().num_components();
793
794        let mut px = *pixels_written;
795        // indicates whether image is vertically up-sampled
796        let is_vertically_sampled = self
797            .components
798            .iter()
799            .any(|c| c.sample_ratio == SampleRatios::HV || c.sample_ratio == SampleRatios::V);
800
801        let mut comp_len = self.components.len();
802
803        // If we are moving from YCbCr -> Luma, we do not allocate storage for other components, so we
804        // will panic when we are trying to read samples, so for that case,
805        // hardcode it so that we  don't panic when doing
806        //   *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width]
807        if out_colorspace_components < comp_len && self.options.jpeg_get_out_colorspace() == Luma {
808            comp_len = out_colorspace_components;
809        }
810        let mut color_conv_function =
811            |num_iters: usize, samples: [&[i16]; 4]| -> Result<(), DecodeErrors> {
812                for (pos, output) in pixels[px..]
813                    .chunks_exact_mut(width * out_colorspace_components)
814                    .take(num_iters)
815                    .enumerate()
816                {
817                    let mut raw_samples: [&[i16]; 4] = [&[], &[], &[], &[]];
818
819                    // iterate over each line, since color-convert needs only
820                    // one line
821                    for (j, samp) in raw_samples.iter_mut().enumerate().take(comp_len) {
822                        let temp = &samples[j].get(pos * padded_width..(pos + 1) * padded_width);
823                        if temp.is_none() {
824                            return Err(DecodeErrors::FormatStatic("Missing samples"));
825                        }
826                        *samp = temp.unwrap();
827                    }
828                    color_convert(
829                        &raw_samples,
830                        self.color_convert_16,
831                        self.input_colorspace,
832                        self.options.jpeg_get_out_colorspace(),
833                        output,
834                        width,
835                        padded_width
836                    )?;
837                    px += width * out_colorspace_components;
838                }
839                Ok(())
840            };
841
842        let comps = &mut self.components[..];
843
844        if self.is_interleaved && self.options.jpeg_get_out_colorspace() != ColorSpace::Luma {
845            for comp in comps.iter_mut() {
846                upsample(
847                    comp,
848                    mcu_height,
849                    i,
850                    upsampler_scratch_space,
851                    is_vertically_sampled
852                )?;
853            }
854
855            if is_vertically_sampled {
856                if i > 0 {
857                    // write the last line, it wasn't  up-sampled as we didn't have row_down
858                    // yet
859                    let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
860
861                    for (samp, component) in samples.iter_mut().zip(comps.iter()) {
862                        *samp = &component.first_row_upsample_dest;
863                    }
864
865                    // ensure length matches for all samples
866                    let _first_len = samples[0].len();
867
868                    // This was a good check, but can be caused to panic, esp on invalid/corrupt images.
869                    // See one in issue https://github.com/etemesi254/zune-image/issues/262, so for now
870                    // we just ignore and generate invalid images at the end.
871
872                    //
873                    //
874                    // for samp in samples.iter().take(comp_len) {
875                    //     assert_eq!(first_len, samp.len());
876                    // }
877                    let num_iters = self.coeff * self.v_max;
878
879                    color_conv_function(num_iters, samples)?;
880                }
881
882                // After up-sampling the last row, save  any row that can be used for
883                // a later up-sampling,
884                //
885                // E.g the Y sample is not sampled but we haven't finished upsampling the last row of
886                // the previous mcu, since we don't have the down row, so save it
887                for component in comps.iter_mut() {
888                    if component.sample_ratio != SampleRatios::H {
889                        // We don't care about H sampling factors, since it's copied in the workers function
890
891                        // copy last row to be used for the  next color conversion
892                        let size = component.vertical_sample
893                            * component.width_stride
894                            * component.sample_ratio.sample();
895
896                        let last_bytes =
897                            component.raw_coeff.rchunks_exact_mut(size).next().unwrap();
898
899                        component
900                            .first_row_upsample_dest
901                            .copy_from_slice(last_bytes);
902                    }
903                }
904            }
905
906            let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
907
908            for (samp, component) in samples.iter_mut().zip(comps.iter()) {
909                *samp = if component.sample_ratio == SampleRatios::None {
910                    &component.raw_coeff
911                } else {
912                    &component.upsample_dest
913                };
914            }
915
916            // we either do 7 or 8 MCU's depending on the state, this only applies to
917            // vertically sampled images
918            //
919            // for rows up until the last MCU, we do not upsample the last stride of the MCU
920            // which means that the number of iterations should take that into account is one less the
921            // up-sampled size
922            //
923            // For the last MCU, we upsample the last stride, meaning that if we hit the last MCU, we
924            // should sample full raw coeffs
925            let is_last_considered = is_vertically_sampled && (i != mcu_height.saturating_sub(1));
926
927            let num_iters = (8 - usize::from(is_last_considered)) * self.coeff * self.v_max;
928
929            color_conv_function(num_iters, samples)?;
930        } else {
931            let mut channels_ref: [&[i16]; MAX_COMPONENTS] = [&[]; MAX_COMPONENTS];
932
933            self.components
934                .iter()
935                .enumerate()
936                .for_each(|(pos, x)| channels_ref[pos] = &x.raw_coeff);
937
938            if let SampleRatios::Generic(_, v) = self.info.sample_ratio {
939                color_conv_function(8 * v * self.coeff, channels_ref)?;
940            } else {
941                color_conv_function(8 * self.coeff, channels_ref)?;
942            }
943        }
944
945        *pixels_written = px;
946        Ok(())
947    }
948}
949
950enum McuContinuation {
951    Ok,
952    AnotherSos,
953    /// Found an inter-scan marker (DHT/DQT/DRI/COM/APP) that needs handling.
954    /// The caller should parse it and scan for the next SOS.
955    InterScanMarker(Marker),
956    Terminate
957}