zune_jpeg/mcu.rs
1/*
2 * Copyright (c) 2023.
3 *
4 * This software is free software;
5 *
6 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7 */
8
9use alloc::vec::Vec;
10use alloc::{format, vec};
11use core::cmp::min;
12
13use zune_core::bytestream::ZByteReaderTrait;
14use zune_core::colorspace::ColorSpace;
15use zune_core::colorspace::ColorSpace::Luma;
16use zune_core::log::{error, trace, warn};
17
18use crate::bitstream::BitStream;
19use crate::components::SampleRatios;
20use crate::decoder::MAX_COMPONENTS;
21use crate::errors::DecodeErrors;
22use crate::marker::Marker;
23use crate::mcu_prog::get_marker;
24use crate::misc::{calculate_padded_width, setup_component_params};
25use crate::worker::{color_convert, upsample};
26use crate::JpegDecoder;
27
28/// The size of a DC block for a MCU.
29
30pub const DCT_BLOCK: usize = 64;
31
32impl<T: ZByteReaderTrait> JpegDecoder<T> {
33 /// Check for existence of DC and AC Huffman Tables
34 pub(crate) fn check_tables(&self) -> Result<(), DecodeErrors> {
35 // check that dc and AC tables exist outside the hot path
36 for component in &self.components {
37 let _ = &self
38 .dc_huffman_tables
39 .get(component.dc_huff_table)
40 .as_ref()
41 .ok_or_else(|| {
42 DecodeErrors::HuffmanDecode(format!(
43 "No Huffman DC table for component {:?} ",
44 component.component_id
45 ))
46 })?
47 .as_ref()
48 .ok_or_else(|| {
49 DecodeErrors::HuffmanDecode(format!(
50 "No DC table for component {:?}",
51 component.component_id
52 ))
53 })?;
54
55 let _ = &self
56 .ac_huffman_tables
57 .get(component.ac_huff_table)
58 .as_ref()
59 .ok_or_else(|| {
60 DecodeErrors::HuffmanDecode(format!(
61 "No Huffman AC table for component {:?} ",
62 component.component_id
63 ))
64 })?
65 .as_ref()
66 .ok_or_else(|| {
67 DecodeErrors::HuffmanDecode(format!(
68 "No AC table for component {:?}",
69 component.component_id
70 ))
71 })?;
72 }
73 Ok(())
74 }
75
76 /// Decode MCUs and carry out post processing.
77 ///
78 /// This is the main decoder loop for the library, the hot path.
79 ///
80 /// Because of this, we pull in some very crazy optimization tricks hence readability is a pinch
81 /// here.
82 #[allow(
83 clippy::similar_names,
84 clippy::too_many_lines,
85 clippy::cast_possible_truncation
86 )]
87 #[inline(never)]
88 pub(crate) fn decode_mcu_ycbcr_baseline(
89 &mut self, pixels: &mut [u8]
90 ) -> Result<(), DecodeErrors> {
91 setup_component_params(self)?;
92
93 // check dc and AC tables
94 self.check_tables()?;
95
96 let (mut mcu_width, mut mcu_height);
97
98 if self.is_interleaved {
99 // set upsampling functions
100 self.set_upsampling()?;
101
102 mcu_width = self.mcu_x;
103 mcu_height = self.mcu_y;
104 } else {
105 // For non-interleaved images( (1*1) subsampling)
106 // number of MCU's are the widths (+7 to account for paddings) divided bu 8.
107 mcu_width = (self.info.width as usize + 7) / 8;
108 mcu_height = (self.info.height as usize + 7) / 8;
109 }
110 if self.is_interleaved
111 && self.input_colorspace.num_components() > 1
112 && self.options.jpeg_get_out_colorspace().num_components() == 1
113 && (self.info.sample_ratio == SampleRatios::V
114 || self.info.sample_ratio == SampleRatios::HV)
115 {
116 // For a specific set of images, e.g interleaved,
117 // when converting from YcbCr to grayscale, we need to
118 // take into account mcu height since the MCU decoding needs to take
119 // it into account for padding purposes and the post processor
120 // parses two rows per mcu width.
121 //
122 // set coeff to be 2 to ensure that we increment two rows
123 // for every mcu processed also
124 mcu_height *= self.v_max;
125 mcu_height /= self.h_max;
126 self.coeff = 2;
127 }
128
129 if self.input_colorspace == ColorSpace::Luma && self.is_interleaved {
130 warn!("Grayscale image with down-sampled component, resetting component details");
131
132 self.reset_params();
133
134 mcu_width = ((self.info.width + 7) / 8) as usize;
135 mcu_height = ((self.info.height + 7) / 8) as usize;
136 }
137 let width = usize::from(self.info.width);
138
139 let padded_width = calculate_padded_width(width, self.info.sample_ratio);
140
141 let mut stream = BitStream::new();
142 let mut tmp = [0_i32; DCT_BLOCK];
143
144 let comp_len = self.components.len();
145
146 for (pos, comp) in self.components.iter_mut().enumerate() {
147 // Allocate only needed components.
148 //
149 // For special colorspaces i.e YCCK and CMYK, just allocate all of the needed
150 // components.
151 if min(
152 self.options.jpeg_get_out_colorspace().num_components() - 1,
153 pos
154 ) == pos
155 || comp_len == 4
156 // Special colorspace
157 {
158 // allocate enough space to hold a whole MCU width
159 // this means we should take into account sampling ratios
160 // `*8` is because each MCU spans 8 widths.
161 let len = comp.width_stride * comp.vertical_sample * 8;
162
163 comp.needed = true;
164 comp.raw_coeff = vec![0; len];
165 } else {
166 comp.needed = false;
167 }
168 }
169
170 // If all components are contained in the first scan of MCUs, then we can process into
171 // (upsampled) pixels immediately after each MCU, for convenience we use each row of MCUS.
172 // Otherwise, we must first wait until following SOS provide the remaining components.
173 let all_components_in_first_scan = usize::from(self.num_scans) == self.components.len();
174 let mut progressive_mcus: [Vec<i16>; 4] = core::array::from_fn(|_| vec![]);
175
176 if !all_components_in_first_scan {
177 for (component, mcu) in self.components.iter().zip(&mut progressive_mcus) {
178 let len = mcu_width
179 * component.vertical_sample
180 * component.horizontal_sample
181 * mcu_height
182 * 64;
183 *mcu = vec![0; len];
184 }
185 }
186
187 let mut pixels_written = 0;
188
189 let is_hv = usize::from(self.is_interleaved);
190 let upsampler_scratch_size = is_hv * self.components.iter().map(|x| x.width_stride).max().unwrap_or(0) * 8;
191 let mut upsampler_scratch_space = vec![0; upsampler_scratch_size];
192
193 'sos: loop {
194 trace!(
195 "Baseline decoding of components: {:?}",
196 &self.z_order[..usize::from(self.num_scans)]
197 );
198
199 trace!("Decoding MCU width: {mcu_width}, height: {mcu_height}");
200
201 for i in 0..mcu_height {
202 if stream.overread_by > 0 {
203 pixels.get_mut(pixels_written..).map(|v| v.fill(128));
204 if self.options.strict_mode() {
205 return Err(DecodeErrors::FormatStatic("Premature end of buffer"));
206 };
207
208 error!("Premature end of buffer");
209 break;
210 }
211
212 // decode a whole MCU width,
213 // this takes into account interleaved components.
214 let terminate = if all_components_in_first_scan {
215 self.decode_mcu_width::<false>(
216 mcu_width,
217 i,
218 &mut tmp,
219 &mut stream,
220 &mut progressive_mcus
221 )?
222 } else {
223 /* NB: (cae). This code was added due to the issue at https://github.com/etemesi254/zune-image/issues/277
224 *
225 * There is a particular set of images that interleave the start of scan (SOS) with the MCU,
226 * E.g if it's a three component image, we have SOS->MCU ->SOS->MCU ->SOS->MCU
227 * which presents a problem on decoding, we need to buffer the whole image before continuing since
228 * we won't have a row containing all the component data which will be needed e.g for color conversion.
229 *
230 * The mechanisms is that we decode the whole image upfront, which goes against the normal
231 * routine of decoding MCU width , so this requires more memory upfront than initial routines
232 * but it is a single image out of the many corpuses that exist, so its fine.
233 * (image in test-images/jpeg/sos_news.jpeg)
234
235 * Code contributed by Aurelia Molzer (https://github.com/197g)
236
237 *
238 */
239
240 self.decode_mcu_width::<true>(
241 mcu_width,
242 i,
243 &mut tmp,
244 &mut stream,
245 &mut progressive_mcus
246 )?
247 };
248
249 // process that width up until it's impossible. This is faster than allocation the
250 // full components, which we skipped earlier.
251 if all_components_in_first_scan {
252 self.post_process(
253 pixels,
254 i,
255 mcu_height,
256 width,
257 padded_width,
258 &mut pixels_written,
259 &mut upsampler_scratch_space
260 )?;
261 }
262
263 match terminate {
264 McuContinuation::Ok => {}
265 McuContinuation::AnotherSos if all_components_in_first_scan => {
266 warn!("More than one SOS despite already having all components");
267 return Ok(());
268 }
269 McuContinuation::AnotherSos => continue 'sos,
270 McuContinuation::InterScanMarker(marker) => {
271 // Handle inter-scan markers (DHT/DQT/etc) uniformly here.
272 // This keeps all marker handling in the outer loop.
273 if self.advance_to_next_sos(marker, &mut stream)? {
274 continue 'sos;
275 } else {
276 // Hit EOI
277 break;
278 }
279 }
280 McuContinuation::Terminate => {
281 warn!("Got terminate signal, will not process further");
282 pixels.get_mut(pixels_written..).map(|v| v.fill(128));
283 return Ok(());
284 }
285 }
286 }
287
288 // Breaks if we get here, looping only if we have restarted, i.e. found another SOS and
289 // continued at `'sos'.
290 break;
291 }
292
293 if !all_components_in_first_scan {
294 self.finish_baseline_decoding(&progressive_mcus, mcu_width, pixels)?;
295 }
296
297 // it may happen that some images don't have the whole buffer
298 // so we can't panic in case of that
299 // assert_eq!(pixels_written, pixels.len());
300
301 // For UHD usecases that tie two images separating them with EOI and
302 // SOI markers, it may happen that we do not reach this image end of image
303 // So this ensures we reach it
304 // Ensure we read EOI
305 if !stream.seen_eoi {
306 let marker = get_marker(&mut self.stream, &mut stream);
307 match marker {
308 Ok(_m) => {
309 trace!("Found marker {:?}", _m);
310 }
311 Err(_) => {
312 // ignore error
313 }
314 }
315 }
316
317 trace!("Finished decoding image");
318
319 Ok(())
320 }
321
322 /// Process all MCUs when baseline decoding has been processing them component-after-component.
323 /// For simplicity this assembles the dequantized blocks in the order that the post processing
324 /// of an interleaved baseline decoding would use.
325 #[allow(clippy::too_many_lines)]
326 #[allow(clippy::cast_sign_loss)]
327 pub(crate) fn finish_baseline_decoding(
328 &mut self, block: &[Vec<i16>; MAX_COMPONENTS], _mcu_width: usize, pixels: &mut [u8]
329 ) -> Result<(), DecodeErrors> {
330 let mcu_height = self.mcu_y;
331
332 // Size of our output image(width*height)
333 let is_hv = usize::from(self.is_interleaved);
334 let upsampler_scratch_size = is_hv * self.components[0].width_stride;
335 let width = usize::from(self.info.width);
336 let padded_width = calculate_padded_width(width, self.info.sample_ratio);
337
338 let mut upsampler_scratch_space = vec![0; upsampler_scratch_size];
339
340 for (pos, comp) in self.components.iter_mut().enumerate() {
341 // Mark only needed components for computing output colors.
342 if min(
343 self.options.jpeg_get_out_colorspace().num_components() - 1,
344 pos
345 ) == pos
346 || self.input_colorspace == ColorSpace::YCCK
347 || self.input_colorspace == ColorSpace::CMYK
348 {
349 comp.needed = true;
350 } else {
351 comp.needed = false;
352 }
353 }
354
355 let mut pixels_written = 0;
356
357 // dequantize and idct have been performed, only color convert.
358 for i in 0..mcu_height {
359 // All the data is already in the right order, we just need to be able to pass it to
360 // the post_process & upsample method. That expects all the data to be stored as one
361 // row of MCUs in each component's `raw_coeff`.
362 'component: for (position, component) in &mut self.components.iter_mut().enumerate() {
363 if !component.needed {
364 continue 'component;
365 }
366
367 // step is the number of pixels this iteration wil be handling
368 // Given by the number of mcu's height and the length of the component block
369 // Since the component block contains the whole channel as raw pixels
370 // we this evenly divides the pixels into MCU blocks
371 //
372 // For interleaved images, this gives us the exact pixels comprising a whole MCU
373 // block
374 let step = block[position].len() / mcu_height;
375
376 // where we will be reading our pixels from.
377 let slice = &block[position][i * step..][..step];
378 let temp_channel = &mut component.raw_coeff;
379 temp_channel[..step].copy_from_slice(slice);
380 }
381
382 // process that whole stripe of MCUs
383 self.post_process(
384 pixels,
385 i,
386 mcu_height,
387 width,
388 padded_width,
389 &mut pixels_written,
390 &mut upsampler_scratch_space
391 )?;
392 }
393
394 return Ok(());
395 }
396
397 fn decode_mcu_width<const PROGRESSIVE: bool>(
398 &mut self, mcu_width: usize, mcu_height: usize, tmp: &mut [i32; 64],
399 stream: &mut BitStream, progressive: &mut [Vec<i16>; 4]
400 ) -> Result<McuContinuation, DecodeErrors> {
401 let is_one_by_one = !self.scan_subsampled;
402
403 // The definition of MCU depends on the sampling factor of involved scans. When components
404 // have different factors then each Minimal-Coding-Unit is the least common multiple such
405 // that we have an integer number of blocks from each component. But the decoding of these
406 // components differs from it otherwise, we need an inner loop with a dynamic amount of
407 // coefficients per component, whereas otherwise we have exactly one block of coefficients
408 // encoded for each component in the bitstream order.
409 //
410 // We statically specialize on this to improve code generation of the common case a little
411 // bit. We could also special case common sub-sampling cases but be mindful of code bloat.
412 if is_one_by_one {
413 self.inner_decode_mcu_width::<PROGRESSIVE, false>(
414 mcu_width,
415 mcu_height,
416 tmp,
417 stream,
418 progressive
419 )
420 } else {
421 self.inner_decode_mcu_width::<PROGRESSIVE, true>(
422 mcu_width,
423 mcu_height,
424 tmp,
425 stream,
426 progressive
427 )
428 }
429 }
430
431 // Inline-never ensures we do get this function optimize on its own, into two different
432 // versions, without the optimizer tripping up over the complexity that comes with the
433 // constant folding. And constant folding is quite important for performance here as
434 // when `not SAMPLED` then the inner loop has exactly one iteration per component in
435 // the scan. The difference was ~1% or a bit more.
436 fn inner_decode_mcu_width<const PROGRESSIVE: bool, const SAMPLED: bool>(
437 &mut self, mcu_width: usize, mcu_height: usize, tmp: &mut [i32; 64],
438 stream: &mut BitStream, progressive: &mut [Vec<i16>; 4]
439 ) -> Result<McuContinuation, DecodeErrors> {
440 let z_order = self.z_order;
441 let z_scans = &z_order[..usize::from(self.num_scans)];
442
443 // How much of the head of `tmp` was written by the last MCU decoding? We only check for
444 // two different cases and not all possible outcomes as this is only used to optimize the
445 // bytes written in `fill`. Since the clobber happens in UNZIGZAG order we'd be straddling
446 // most cache lines anyways even if we did a partial write with the exact length of the
447 // coefficient data which was written into `tmp`.
448 let mut clobber_more_than_4x4 = true;
449
450 // For non-interleaved scans (PROGRESSIVE=true), each scan contains a single component
451 // and we iterate over that component's actual data unit count, not the interleaved MCU
452 // width multiplied by sampling factor.
453 let mut scan_du_width = if PROGRESSIVE {
454 let k = z_scans[0];
455 let comp = &self.components[k];
456 // Calculate actual data units for this component: ceil(width / (8 * subsampling_ratio))
457 (self.info.width as usize * comp.horizontal_sample + self.h_max * 8 - 1)
458 / (self.h_max * 8)
459 } else {
460 mcu_width
461 };
462 // In malformed scans that list multiple components, clamp to the smallest row capacity
463 // to avoid writing past the row buffer.
464 if PROGRESSIVE && z_scans.len() > 1 {
465 let min_du = z_scans
466 .iter()
467 .map(|&k| self.components[k].width_stride / 8)
468 .min()
469 .unwrap_or(0);
470 scan_du_width = scan_du_width.min(min_du);
471 }
472
473 for j in 0..scan_du_width {
474 // iterate over components
475 for &k in z_scans {
476 // we made this loop body massive due to several different paths that depend on
477 // static conditions. Note we (potentially) call into other functions so the
478 // compiler will not unroll anything here anyways. The gains from separating
479 // differently optimized loop bodies are much greater than a single additional jump
480 // here.
481 let component = &mut self.components[k];
482
483 let dc_table = self.dc_huffman_tables[component.dc_huff_table % MAX_COMPONENTS]
484 .as_ref()
485 .ok_or(DecodeErrors::FormatStatic("DC table not found"))?;
486
487 let ac_table = self.ac_huffman_tables[component.ac_huff_table % MAX_COMPONENTS]
488 .as_ref()
489 .ok_or(DecodeErrors::FormatStatic("AC table not found"))?;
490
491 let qt_table = &component.quantization_table;
492 let channel = if PROGRESSIVE {
493 let offset =
494 mcu_height * component.width_stride * 8 * component.vertical_sample;
495 // Small stopgap for https://github.com/etemesi254/zune-image/issues/362
496 if offset >= progressive[k].len(){
497 return Err(DecodeErrors::FormatStatic("Would panic on slice iteration"))
498 }
499 &mut progressive[k][offset..]
500 } else {
501 &mut component.raw_coeff
502 };
503
504 let component_samples_needed = component.needed;
505
506 // If image is interleaved iterate over scan components,
507 // otherwise if it-s non-interleaved, these routines iterate in
508 // trivial scanline order(Y,Cb,Cr)
509 //
510 // Turn the bounds into a compile time constant for a common special case. This
511 // allows the compiler to unroll the loop and then do a bunch of interleaving.
512 //
513 // For PROGRESSIVE (non-interleaved), we iterate data units directly so
514 // h_samp/v_samp loops run exactly once.
515 let v_step =
516 if SAMPLED && !PROGRESSIVE { 0..component.vertical_sample } else { 0..1 };
517
518 for v_samp in v_step {
519 let h_step =
520 if SAMPLED && !PROGRESSIVE { 0..component.horizontal_sample } else { 0..1 };
521
522 for h_samp in h_step {
523 let result = if component_samples_needed {
524 // Fill the array with zeroes, decode_mcu_block expects
525 // a zero based array. Clobber is in zig-zag order though.
526 // Writing consecutive entries is basically free in terms
527 // of memory throughput so we opt for a larger power of
528 // two which lets the compiler turn this into a repeated
529 // write of a zeroed vector register, which does not have
530 // any branches, instead of a more difficult pattern where
531 // we attempt to overwrite exactly one coefficient.
532 let clobber_len = if !clobber_more_than_4x4 { 32 } else { 64 };
533
534 tmp[..clobber_len].fill(0);
535
536 stream.decode_mcu_block(
537 &mut self.stream,
538 dc_table,
539 ac_table,
540 qt_table,
541 tmp,
542 &mut component.dc_pred
543 )
544 } else {
545 // We do not touch tmp so there is no need to reset it.
546 stream.discard_mcu_block(&mut self.stream, dc_table, ac_table)
547 };
548
549 // If an error occurs we can either propagate it
550 // as an error or print it and call terminate.
551 //
552 // This allows even corrupt images to render something,
553 // even if its bad, matching browsers.
554 //
555 // See example in https://github.com/etemesi254/zune-image/issues/293
556 let len = if let Ok(len) = result {
557 len
558 } else {
559 // result.is_err()
560 return if self.options.strict_mode() {
561 Err(result.err().unwrap())
562 } else {
563 error!("{}", result.err().unwrap());
564 Ok(McuContinuation::Terminate)
565 };
566 };
567
568 if component_samples_needed {
569 // tmp was only written partially, note that len is in ZigZag order.
570 clobber_more_than_4x4 = len > 10;
571
572 let idct_position = if PROGRESSIVE {
573 // For non-interleaved, j indexes data units directly
574 j * 8
575 } else {
576 // derived from stb and rewritten for my tastes
577 let c2 = v_samp * 8;
578 let c3 = ((j * component.horizontal_sample) + h_samp) * 8;
579
580 component.width_stride * c2 + c3
581 };
582
583 let idct_pos = channel.get_mut(idct_position..).unwrap();
584
585 if len <= 1 {
586 (self.idct_1x1_func)(tmp, idct_pos, component.width_stride);
587 } else if len <= 10 {
588 (self.idct_4x4_func)(tmp, idct_pos, component.width_stride);
589 } else {
590 // call idct.
591 (self.idct_func)(tmp, idct_pos, component.width_stride);
592 }
593 }
594 }
595 }
596 }
597
598 self.todo = self.todo.wrapping_sub(1);
599
600 if self.todo == 0 {
601 self.handle_rst_main(stream)?;
602 continue;
603 }
604
605 if stream.marker.is_some() && stream.bits_left == 0 {
606 break;
607 }
608 }
609
610 self.check_stream_marker_after_mcu_width(stream)
611 }
612
613 fn check_stream_marker_after_mcu_width(
614 &mut self, stream: &mut BitStream
615 ) -> Result<McuContinuation, DecodeErrors> {
616 // After all interleaved components, that's an MCU
617 // handle stream markers
618 //
619 // In some corrupt images, it may occur that header markers occur in the stream.
620 // The spec EXPLICITLY FORBIDS this, specifically, in
621 // routine F.2.2.5 it says
622 // `The only valid marker which may occur within the Huffman coded data is the RSTm marker.`
623 //
624 // But libjpeg-turbo allows it because of some weird reason. so I'll also
625 // allow it because of some weird reason.
626 if let Some(m) = stream.marker {
627 if m == Marker::EOI {
628 // acknowledge and ignore EOI marker.
629 stream.marker.take();
630 trace!("Found EOI marker");
631 // Google Introduced the Ultra-HD image format which is basically
632 // stitching two images into one container.
633 // They basically separate two images via a EOI and SOI marker
634 // so let's just ensure if we ever see EOI, we never read past that
635 // ever.
636 // https://github.com/google/libultrahdr
637 stream.seen_eoi = true;
638 } else if let Marker::RST(_) = m {
639 //debug_assert_eq!(self.todo, 0);
640 if self.todo == 0 {
641 self.handle_rst(stream)?;
642 }
643 } else if let Marker::SOS = m {
644 self.parse_marker_inner(m)?;
645 stream.marker.take();
646 stream.reset();
647 trace!("Found SOS marker");
648 return Ok(McuContinuation::AnotherSos);
649 } else if matches!(m, Marker::DHT | Marker::DQT | Marker::DRI | Marker::COM)
650 || matches!(m, Marker::APP(_))
651 {
652 // For non-interleaved images, setup markers can appear between scans.
653 // Signal the caller to handle this marker and find the next SOS.
654 // This keeps all marker parsing in the caller's loop.
655 stream.marker.take();
656 trace!("Found inter-scan marker {:?}", m);
657 return Ok(McuContinuation::InterScanMarker(m));
658 } else {
659 if self.options.strict_mode() {
660 return Err(DecodeErrors::Format(format!(
661 "Marker {m:?} found where not expected"
662 )));
663 }
664 error!(
665 "Marker `{:?}` Found within Huffman Stream, possibly corrupt jpeg",
666 m
667 );
668
669 self.parse_marker_inner(m)?;
670 stream.marker.take();
671 stream.reset();
672 return Ok(McuContinuation::Terminate);
673 }
674 }
675
676 Ok(McuContinuation::Ok)
677 }
678
679 /// Scan for the next SOS marker, parsing setup markers along the way.
680 ///
681 /// This is the unified marker scanning function used after encountering an
682 /// inter-scan marker. It handles DHT, DQT, DRI, COM, and APP markers that
683 /// can appear between scans in non-interleaved images.
684 ///
685 /// # Arguments
686 /// * `first_marker` - The first marker that was already detected (not yet parsed)
687 /// * `stream` - The bitstream state
688 ///
689 /// # Returns
690 /// * `Ok(true)` - Found SOS, ready to continue decoding
691 /// * `Ok(false)` - Found EOI, decoding complete
692 /// * `Err(_)` - Error (too many markers, unexpected marker in strict mode, etc.)
693 fn advance_to_next_sos(
694 &mut self,
695 first_marker: Marker,
696 stream: &mut BitStream
697 ) -> Result<bool, DecodeErrors> {
698 // Limit iterations to prevent DoS from malicious files.
699 const MAX_INTER_SCAN_MARKERS: usize = 64;
700
701 // Parse the first marker that triggered this call
702 self.parse_marker_inner(first_marker)?;
703 stream.reset();
704
705 for _ in 0..MAX_INTER_SCAN_MARKERS {
706 let marker = get_marker(&mut self.stream, stream)?;
707
708 match marker {
709 Marker::SOS => {
710 self.parse_marker_inner(Marker::SOS)?;
711 stream.reset();
712 trace!("Found SOS marker, continuing decode");
713 return Ok(true);
714 }
715 Marker::EOI => {
716 stream.seen_eoi = true;
717 trace!("Found EOI marker");
718 return Ok(false);
719 }
720 Marker::DHT | Marker::DQT | Marker::DRI | Marker::COM => {
721 trace!("Parsing inter-scan marker {:?}", marker);
722 self.parse_marker_inner(marker)?;
723 }
724 Marker::APP(_) => {
725 trace!("Parsing inter-scan APP marker {:?}", marker);
726 self.parse_marker_inner(marker)?;
727 }
728 other => {
729 if self.options.strict_mode() {
730 return Err(DecodeErrors::Format(format!(
731 "Unexpected marker {:?} while scanning for SOS between scans",
732 other
733 )));
734 }
735 // Non-strict: skip unknown marker
736 warn!("Skipping unexpected marker {:?} between scans", other);
737 let length = self.stream.get_u16_be_err()?;
738 if length >= 2 {
739 self.stream.skip((length - 2) as usize)?;
740 }
741 }
742 }
743 }
744
745 Err(DecodeErrors::FormatStatic(
746 "Too many markers between scans (exceeded limit of 64)"
747 ))
748 }
749
750 // handle RST markers.
751 // No-op if not using restarts
752 // this routine is shared with mcu_prog
753 #[cold]
754 pub(crate) fn handle_rst(&mut self, stream: &mut BitStream) -> Result<(), DecodeErrors> {
755 self.todo = self.restart_interval;
756
757 if let Some(marker) = stream.marker {
758 // Found a marker
759 // Read stream and see what marker is stored there
760 match marker {
761 Marker::RST(_) => {
762 // reset stream
763 stream.reset();
764 // Initialize dc predictions to zero for all components
765 self.components.iter_mut().for_each(|x| x.dc_pred = 0);
766 // Start iterating again. from position.
767 }
768 Marker::EOI => {
769 // silent pass
770 }
771 // Valid markers that can appear between scans at a restart boundary
772 // (restart interval aligns with end of scan). Leave for caller.
773 Marker::SOS | Marker::DHT | Marker::DQT | Marker::DRI | Marker::COM
774 | Marker::APP(_) => {}
775 _ => {
776 if self.options.strict_mode() {
777 return Err(DecodeErrors::MCUError(format!(
778 "Unexpected marker {marker:?} at restart boundary"
779 )));
780 }
781 warn!("Unexpected marker {:?} at restart boundary", marker);
782 }
783 }
784 }
785 Ok(())
786 }
787 #[allow(clippy::too_many_lines, clippy::too_many_arguments)]
788 pub(crate) fn post_process(
789 &mut self, pixels: &mut [u8], i: usize, mcu_height: usize, width: usize,
790 padded_width: usize, pixels_written: &mut usize, upsampler_scratch_space: &mut [i16]
791 ) -> Result<(), DecodeErrors> {
792 let out_colorspace_components = self.options.jpeg_get_out_colorspace().num_components();
793
794 let mut px = *pixels_written;
795 // indicates whether image is vertically up-sampled
796 let is_vertically_sampled = self
797 .components
798 .iter()
799 .any(|c| c.sample_ratio == SampleRatios::HV || c.sample_ratio == SampleRatios::V);
800
801 let mut comp_len = self.components.len();
802
803 // If we are moving from YCbCr -> Luma, we do not allocate storage for other components, so we
804 // will panic when we are trying to read samples, so for that case,
805 // hardcode it so that we don't panic when doing
806 // *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width]
807 if out_colorspace_components < comp_len && self.options.jpeg_get_out_colorspace() == Luma {
808 comp_len = out_colorspace_components;
809 }
810 let mut color_conv_function =
811 |num_iters: usize, samples: [&[i16]; 4]| -> Result<(), DecodeErrors> {
812 for (pos, output) in pixels[px..]
813 .chunks_exact_mut(width * out_colorspace_components)
814 .take(num_iters)
815 .enumerate()
816 {
817 let mut raw_samples: [&[i16]; 4] = [&[], &[], &[], &[]];
818
819 // iterate over each line, since color-convert needs only
820 // one line
821 for (j, samp) in raw_samples.iter_mut().enumerate().take(comp_len) {
822 let temp = &samples[j].get(pos * padded_width..(pos + 1) * padded_width);
823 if temp.is_none() {
824 return Err(DecodeErrors::FormatStatic("Missing samples"));
825 }
826 *samp = temp.unwrap();
827 }
828 color_convert(
829 &raw_samples,
830 self.color_convert_16,
831 self.input_colorspace,
832 self.options.jpeg_get_out_colorspace(),
833 output,
834 width,
835 padded_width
836 )?;
837 px += width * out_colorspace_components;
838 }
839 Ok(())
840 };
841
842 let comps = &mut self.components[..];
843
844 if self.is_interleaved && self.options.jpeg_get_out_colorspace() != ColorSpace::Luma {
845 for comp in comps.iter_mut() {
846 upsample(
847 comp,
848 mcu_height,
849 i,
850 upsampler_scratch_space,
851 is_vertically_sampled
852 )?;
853 }
854
855 if is_vertically_sampled {
856 if i > 0 {
857 // write the last line, it wasn't up-sampled as we didn't have row_down
858 // yet
859 let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
860
861 for (samp, component) in samples.iter_mut().zip(comps.iter()) {
862 *samp = &component.first_row_upsample_dest;
863 }
864
865 // ensure length matches for all samples
866 let _first_len = samples[0].len();
867
868 // This was a good check, but can be caused to panic, esp on invalid/corrupt images.
869 // See one in issue https://github.com/etemesi254/zune-image/issues/262, so for now
870 // we just ignore and generate invalid images at the end.
871
872 //
873 //
874 // for samp in samples.iter().take(comp_len) {
875 // assert_eq!(first_len, samp.len());
876 // }
877 let num_iters = self.coeff * self.v_max;
878
879 color_conv_function(num_iters, samples)?;
880 }
881
882 // After up-sampling the last row, save any row that can be used for
883 // a later up-sampling,
884 //
885 // E.g the Y sample is not sampled but we haven't finished upsampling the last row of
886 // the previous mcu, since we don't have the down row, so save it
887 for component in comps.iter_mut() {
888 if component.sample_ratio != SampleRatios::H {
889 // We don't care about H sampling factors, since it's copied in the workers function
890
891 // copy last row to be used for the next color conversion
892 let size = component.vertical_sample
893 * component.width_stride
894 * component.sample_ratio.sample();
895
896 let last_bytes =
897 component.raw_coeff.rchunks_exact_mut(size).next().unwrap();
898
899 component
900 .first_row_upsample_dest
901 .copy_from_slice(last_bytes);
902 }
903 }
904 }
905
906 let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
907
908 for (samp, component) in samples.iter_mut().zip(comps.iter()) {
909 *samp = if component.sample_ratio == SampleRatios::None {
910 &component.raw_coeff
911 } else {
912 &component.upsample_dest
913 };
914 }
915
916 // we either do 7 or 8 MCU's depending on the state, this only applies to
917 // vertically sampled images
918 //
919 // for rows up until the last MCU, we do not upsample the last stride of the MCU
920 // which means that the number of iterations should take that into account is one less the
921 // up-sampled size
922 //
923 // For the last MCU, we upsample the last stride, meaning that if we hit the last MCU, we
924 // should sample full raw coeffs
925 let is_last_considered = is_vertically_sampled && (i != mcu_height.saturating_sub(1));
926
927 let num_iters = (8 - usize::from(is_last_considered)) * self.coeff * self.v_max;
928
929 color_conv_function(num_iters, samples)?;
930 } else {
931 let mut channels_ref: [&[i16]; MAX_COMPONENTS] = [&[]; MAX_COMPONENTS];
932
933 self.components
934 .iter()
935 .enumerate()
936 .for_each(|(pos, x)| channels_ref[pos] = &x.raw_coeff);
937
938 if let SampleRatios::Generic(_, v) = self.info.sample_ratio {
939 color_conv_function(8 * v * self.coeff, channels_ref)?;
940 } else {
941 color_conv_function(8 * self.coeff, channels_ref)?;
942 }
943 }
944
945 *pixels_written = px;
946 Ok(())
947 }
948}
949
950enum McuContinuation {
951 Ok,
952 AnotherSos,
953 /// Found an inter-scan marker (DHT/DQT/DRI/COM/APP) that needs handling.
954 /// The caller should parse it and scan for the next SOS.
955 InterScanMarker(Marker),
956 Terminate
957}