zune_jpeg/mcu.rs
1/*
2 * Copyright (c) 2023.
3 *
4 * This software is free software;
5 *
6 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7 */
8
9use alloc::{format, vec};
10use core::cmp::min;
11use alloc::vec::Vec;
12use zune_core::bytestream::ZReaderTrait;
13use zune_core::colorspace::ColorSpace;
14use zune_core::colorspace::ColorSpace::Luma;
15use zune_core::log::{error, trace, warn};
16
17use crate::bitstream::BitStream;
18use crate::components::SampleRatios;
19use crate::decoder::MAX_COMPONENTS;
20use crate::errors::DecodeErrors;
21use crate::marker::Marker;
22use crate::mcu_prog::get_marker;
23use crate::misc::{calculate_padded_width, setup_component_params};
24use crate::worker::{color_convert, upsample};
25use crate::JpegDecoder;
26
27/// The size of a DC block for a MCU.
28
29pub const DCT_BLOCK: usize = 64;
30
31impl<T: ZReaderTrait> JpegDecoder<T> {
32 /// Check for existence of DC and AC Huffman Tables
33 pub(crate) fn check_tables(&self) -> Result<(), DecodeErrors> {
34 // check that dc and AC tables exist outside the hot path
35 for component in &self.components {
36 let _ = &self
37 .dc_huffman_tables
38 .get(component.dc_huff_table)
39 .as_ref()
40 .ok_or_else(|| {
41 DecodeErrors::HuffmanDecode(format!(
42 "No Huffman DC table for component {:?} ",
43 component.component_id
44 ))
45 })?
46 .as_ref()
47 .ok_or_else(|| {
48 DecodeErrors::HuffmanDecode(format!(
49 "No DC table for component {:?}",
50 component.component_id
51 ))
52 })?;
53
54 let _ = &self
55 .ac_huffman_tables
56 .get(component.ac_huff_table)
57 .as_ref()
58 .ok_or_else(|| {
59 DecodeErrors::HuffmanDecode(format!(
60 "No Huffman AC table for component {:?} ",
61 component.component_id
62 ))
63 })?
64 .as_ref()
65 .ok_or_else(|| {
66 DecodeErrors::HuffmanDecode(format!(
67 "No AC table for component {:?}",
68 component.component_id
69 ))
70 })?;
71 }
72 Ok(())
73 }
74
75 /// Decode MCUs and carry out post processing.
76 ///
77 /// This is the main decoder loop for the library, the hot path.
78 ///
79 /// Because of this, we pull in some very crazy optimization tricks hence readability is a pinch
80 /// here.
81 #[allow(
82 clippy::similar_names,
83 clippy::too_many_lines,
84 clippy::cast_possible_truncation
85 )]
86 #[inline(never)]
87 pub(crate) fn decode_mcu_ycbcr_baseline(
88 &mut self, pixels: &mut [u8]
89 ) -> Result<(), DecodeErrors> {
90 setup_component_params(self)?;
91
92 // check dc and AC tables
93 self.check_tables()?;
94
95 let (mut mcu_width, mut mcu_height);
96
97 if self.is_interleaved {
98 // set upsampling functions
99 self.set_upsampling()?;
100
101 mcu_width = self.mcu_x;
102 mcu_height = self.mcu_y;
103 } else {
104 // For non-interleaved images( (1*1) subsampling)
105 // number of MCU's are the widths (+7 to account for paddings) divided bu 8.
106 mcu_width = ((self.info.width + 7) / 8) as usize;
107 mcu_height = ((self.info.height + 7) / 8) as usize;
108 }
109 if self.is_interleaved
110 && self.input_colorspace.num_components() > 1
111 && self.options.jpeg_get_out_colorspace().num_components() == 1
112 && (self.sub_sample_ratio == SampleRatios::V
113 || self.sub_sample_ratio == SampleRatios::HV)
114 {
115 // For a specific set of images, e.g interleaved,
116 // when converting from YcbCr to grayscale, we need to
117 // take into account mcu height since the MCU decoding needs to take
118 // it into account for padding purposes and the post processor
119 // parses two rows per mcu width.
120 //
121 // set coeff to be 2 to ensure that we increment two rows
122 // for every mcu processed also
123 mcu_height *= self.v_max;
124 mcu_height /= self.h_max;
125 self.coeff = 2;
126 }
127
128 if self.input_colorspace == ColorSpace::Luma && self.is_interleaved {
129 warn!("Grayscale image with down-sampled component, resetting component details");
130
131 self.reset_params();
132
133 mcu_width = ((self.info.width + 7) / 8) as usize;
134 mcu_height = ((self.info.height + 7) / 8) as usize;
135 }
136 let width = usize::from(self.info.width);
137
138 let padded_width = calculate_padded_width(width, self.sub_sample_ratio);
139
140 let mut stream = BitStream::new();
141 let mut tmp = [0_i32; DCT_BLOCK];
142
143 let comp_len = self.components.len();
144
145 for (pos, comp) in self.components.iter_mut().enumerate() {
146 // Allocate only needed components.
147 //
148 // For special colorspaces i.e YCCK and CMYK, just allocate all of the needed
149 // components.
150 if min(
151 self.options.jpeg_get_out_colorspace().num_components() - 1,
152 pos
153 ) == pos
154 || comp_len == 4
155 // Special colorspace
156 {
157 // allocate enough space to hold a whole MCU width
158 // this means we should take into account sampling ratios
159 // `*8` is because each MCU spans 8 widths.
160 let len = comp.width_stride * comp.vertical_sample * 8;
161
162 comp.needed = true;
163 comp.raw_coeff = vec![0; len];
164 } else {
165 comp.needed = false;
166 }
167 }
168
169 // If all components are contained in the first scan of MCUs, then we can process into
170 // (upsampled) pixels immediately after each MCU, for convenience we use each row of MCUS.
171 // Otherwise, we must first wait until following SOS provide the remaining components.
172 let all_components_in_first_scan = usize::from(self.num_scans) == self.components.len();
173 let mut progressive_mcus: [Vec<i16>; 4] = core::array::from_fn(|_| vec![]);
174
175 if !all_components_in_first_scan {
176 for (component, mcu) in self.components.iter().zip(&mut progressive_mcus) {
177 let len = mcu_width
178 * component.vertical_sample
179 * component.horizontal_sample
180 * mcu_height
181 * 64;
182 *mcu = vec![0; len];
183 }
184 }
185
186 let mut pixels_written = 0;
187
188 let is_hv = usize::from(self.is_interleaved);
189 let upsampler_scratch_size = is_hv * self.components[0].width_stride;
190 let mut upsampler_scratch_space = vec![0; upsampler_scratch_size];
191
192 'sos: loop {
193 trace!(
194 "Baseline decoding of components: {:?}",
195 &self.z_order[..usize::from(self.num_scans)]
196 );
197 trace!("Decoding MCU width: {mcu_width}, height: {mcu_height}");
198
199 for i in 0..mcu_height {
200 // Report if we have no more bytes
201 // This may generate false negatives since we over-read bytes
202 // hence that why 37 is chosen(we assume if we over-read more than 37 bytes, we have a problem)
203 if stream.overread_by > 37
204 // favourite number :)
205 {
206 if self.options.get_strict_mode() {
207 return Err(DecodeErrors::FormatStatic("Premature end of buffer"));
208 };
209
210 error!("Premature end of buffer");
211 break;
212 }
213
214 // decode a whole MCU width,
215 // this takes into account interleaved components.
216 let terminate = if all_components_in_first_scan {
217 self.decode_mcu_width::<false>(
218 mcu_width,
219 i,
220 &mut tmp,
221 &mut stream,
222 &mut progressive_mcus,
223 )?
224 } else {
225 self.decode_mcu_width::<true>(
226 mcu_width,
227 i,
228 &mut tmp,
229 &mut stream,
230 &mut progressive_mcus,
231 )?
232 };
233
234 // process that width up until it's impossible. This is faster than allocation the
235 // full components, which we skipped earlier.
236 if all_components_in_first_scan {
237 self.post_process(
238 pixels,
239 i,
240 mcu_height,
241 width,
242 padded_width,
243 &mut pixels_written,
244 &mut upsampler_scratch_space,
245 )?;
246 }
247
248 match terminate {
249 McuContinuation::Ok => {}
250 McuContinuation::AnotherSos if all_components_in_first_scan => {
251 warn!("More than one SOS despite already having all components");
252 return Ok(());
253 }
254 McuContinuation::AnotherSos => continue 'sos,
255 McuContinuation::Terminate => {
256 warn!("Got terminate signal, will not process further");
257 return Ok(());
258 }
259 }
260 }
261
262 // Breaks if we get here, looping only if we have restarted, i.e. found another SOS and
263 // continued at `'sos'.
264 break;
265 }
266
267 if !all_components_in_first_scan {
268 self.finish_baseline_decoding(&progressive_mcus, mcu_width, pixels)?;
269 }
270
271 // it may happen that some images don't have the whole buffer
272 // so we can't panic in case of that
273 // assert_eq!(pixels_written, pixels.len());
274
275 // For UHD usecases that tie two images separating them with EOI and
276 // SOI markers, it may happen that we do not reach this image end of image
277 // So this ensures we reach it
278 // Ensure we read EOI
279 if !stream.seen_eoi {
280 let marker = get_marker(&mut self.stream, &mut stream);
281 match marker {
282 Ok(_m) => {
283 trace!("Found marker {:?}", _m);
284 }
285 Err(_) => {
286 // ignore error
287 }
288 }
289 }
290
291 trace!("Finished decoding image");
292
293 Ok(())
294 }
295
296 /// Process all MCUs when baseline decoding has been processing them component-after-component.
297 /// For simplicity this assembles the dequantized blocks in the order that the post processing
298 /// of an interleaved baseline decoding would use.
299 #[allow(clippy::too_many_lines)]
300 #[allow(clippy::cast_sign_loss)]
301 pub(crate) fn finish_baseline_decoding(
302 &mut self, block: &[Vec<i16>; MAX_COMPONENTS], _mcu_width: usize, pixels: &mut [u8],
303 ) -> Result<(), DecodeErrors> {
304 let mcu_height = self.mcu_y;
305
306 // Size of our output image(width*height)
307 let is_hv = usize::from(self.is_interleaved);
308 let upsampler_scratch_size = is_hv * self.components[0].width_stride;
309 let width = usize::from(self.info.width);
310 let padded_width = calculate_padded_width(width, self.sub_sample_ratio);
311
312 let mut upsampler_scratch_space = vec![0; upsampler_scratch_size];
313
314 for (pos, comp) in self.components.iter_mut().enumerate() {
315 // Mark only needed components for computing output colors.
316 if min(
317 self.options.jpeg_get_out_colorspace().num_components() - 1,
318 pos,
319 ) == pos
320 || self.input_colorspace == ColorSpace::YCCK
321 || self.input_colorspace == ColorSpace::CMYK
322 {
323 comp.needed = true;
324 } else {
325 comp.needed = false;
326 }
327 }
328
329 let mut pixels_written = 0;
330
331 // dequantize and idct have been performed, only color convert.
332 for i in 0..mcu_height {
333 // All the data is already in the right order, we just need to be able to pass it to
334 // the post_process & upsample method. That expects all the data to be stored as one
335 // row of MCUs in each component's `raw_coeff`.
336 'component: for (position, component) in &mut self.components.iter_mut().enumerate() {
337 if !component.needed {
338 continue 'component;
339 }
340
341 // step is the number of pixels this iteration wil be handling
342 // Given by the number of mcu's height and the length of the component block
343 // Since the component block contains the whole channel as raw pixels
344 // we this evenly divides the pixels into MCU blocks
345 //
346 // For interleaved images, this gives us the exact pixels comprising a whole MCU
347 // block
348 let step = block[position].len() / mcu_height;
349
350 // where we will be reading our pixels from.
351 let slice = &block[position][i * step..][..step];
352 let temp_channel = &mut component.raw_coeff;
353 temp_channel[..step].copy_from_slice(slice);
354 }
355
356 // process that whole stripe of MCUs
357 self.post_process(
358 pixels,
359 i,
360 mcu_height,
361 width,
362 padded_width,
363 &mut pixels_written,
364 &mut upsampler_scratch_space,
365 )?;
366 }
367
368 return Ok(());
369 }
370
371 fn decode_mcu_width<const PROGRESSIVE: bool>(
372 &mut self, mcu_width: usize, mcu_height: usize, tmp: &mut [i32; 64],
373 stream: &mut BitStream, progressive: &mut [Vec<i16>; 4],
374 ) -> Result<McuContinuation, DecodeErrors> {
375 let z_order = self.z_order;
376
377 for j in 0..mcu_width {
378 // iterate over components
379 for &k in &z_order[..usize::from(self.num_scans)] {
380 let component = &mut self.components[k];
381
382 let dc_table = self.dc_huffman_tables[component.dc_huff_table % MAX_COMPONENTS]
383 .as_ref()
384 .unwrap();
385
386 let ac_table = self.ac_huffman_tables[component.ac_huff_table % MAX_COMPONENTS]
387 .as_ref()
388 .unwrap();
389
390 let qt_table = &component.quantization_table;
391 let channel = if PROGRESSIVE {
392 let offset =
393 mcu_height * component.width_stride * 8 * component.vertical_sample;
394 &mut progressive[k][offset..]
395 } else {
396 &mut component.raw_coeff
397 };
398
399 // If image is interleaved iterate over scan components,
400 // otherwise if it-s non-interleaved, these routines iterate in
401 // trivial scanline order(Y,Cb,Cr)
402 for v_samp in 0..component.vertical_sample {
403 for h_samp in 0..component.horizontal_sample {
404 // Fill the array with zeroes, decode_mcu_block expects
405 // a zero based array.
406 tmp.fill(0);
407
408 stream.decode_mcu_block(
409 &mut self.stream,
410 dc_table,
411 ac_table,
412 qt_table,
413 tmp,
414 &mut component.dc_pred,
415 )?;
416
417 if component.needed {
418 let idct_position = {
419 // derived from stb and rewritten for my tastes
420 let c2 = v_samp * 8;
421 let c3 = ((j * component.horizontal_sample) + h_samp) * 8;
422
423 component.width_stride * c2 + c3
424 };
425
426 let idct_pos = channel.get_mut(idct_position..).unwrap();
427 // call idct.
428 (self.idct_func)(tmp, idct_pos, component.width_stride);
429 }
430 }
431 }
432 }
433
434 self.todo = self.todo.wrapping_sub(1);
435
436 if self.todo == 0 {
437 self.handle_rst_main(stream)?;
438 continue;
439 }
440
441 if stream.marker.is_some() && stream.bits_left == 0 {
442 break;
443 }
444 }
445
446 // After all interleaved components, that's an MCU
447 // handle stream markers
448 //
449 // In some corrupt images, it may occur that header markers occur in the stream.
450 // The spec EXPLICITLY FORBIDS this, specifically, in
451 // routine F.2.2.5 it says
452 // `The only valid marker which may occur within the Huffman coded data is the RSTm marker.`
453 //
454 // But libjpeg-turbo allows it because of some weird reason. so I'll also
455 // allow it because of some weird reason.
456 if let Some(m) = stream.marker {
457 if m == Marker::EOI {
458 // acknowledge and ignore EOI marker.
459 stream.marker.take();
460 trace!("Found EOI marker");
461 // Google Introduced the Ultra-HD image format which is basically
462 // stitching two images into one container.
463 // They basically separate two images via a EOI and SOI marker
464 // so let's just ensure if we ever see EOI, we never read past that
465 // ever.
466 // https://github.com/google/libultrahdr
467 stream.seen_eoi = true;
468 } else if let Marker::RST(_) = m {
469 //debug_assert_eq!(self.todo, 0);
470 if self.todo == 0 {
471 self.handle_rst(stream)?;
472 }
473 } else if let Marker::SOS = m {
474 self.parse_marker_inner(m)?;
475 stream.marker.take();
476 stream.reset();
477 trace!("Found SOS marker");
478 return Ok(McuContinuation::AnotherSos);
479 } else {
480 if self.options.get_strict_mode() {
481 return Err(DecodeErrors::Format(format!(
482 "Marker {m:?} found where not expected"
483 )));
484 }
485 error!(
486 "Marker `{:?}` Found within Huffman Stream, possibly corrupt jpeg",
487 m
488 );
489
490 self.parse_marker_inner(m)?;
491 stream.marker.take();
492 stream.reset();
493 return Ok(McuContinuation::Terminate);
494 }
495 }
496
497 Ok(McuContinuation::Ok)
498 }
499 // handle RST markers.
500 // No-op if not using restarts
501 // this routine is shared with mcu_prog
502 #[cold]
503 pub(crate) fn handle_rst(&mut self, stream: &mut BitStream) -> Result<(), DecodeErrors> {
504 self.todo = self.restart_interval;
505
506 if let Some(marker) = stream.marker {
507 // Found a marker
508 // Read stream and see what marker is stored there
509 match marker {
510 Marker::RST(_) => {
511 // reset stream
512 stream.reset();
513 // Initialize dc predictions to zero for all components
514 self.components.iter_mut().for_each(|x| x.dc_pred = 0);
515 // Start iterating again. from position.
516 }
517 Marker::EOI => {
518 // silent pass
519 }
520 _ => {
521 return Err(DecodeErrors::MCUError(format!(
522 "Marker {marker:?} found in bitstream, possibly corrupt jpeg"
523 )));
524 }
525 }
526 }
527 Ok(())
528 }
529 #[allow(clippy::too_many_lines, clippy::too_many_arguments)]
530 pub(crate) fn post_process(
531 &mut self, pixels: &mut [u8], i: usize, mcu_height: usize, width: usize,
532 padded_width: usize, pixels_written: &mut usize, upsampler_scratch_space: &mut [i16]
533 ) -> Result<(), DecodeErrors> {
534 let out_colorspace_components = self.options.jpeg_get_out_colorspace().num_components();
535
536 let mut px = *pixels_written;
537 // indicates whether image is vertically up-sampled
538 let is_vertically_sampled = self
539 .components
540 .iter()
541 .any(|c| c.sample_ratio == SampleRatios::HV || c.sample_ratio == SampleRatios::V);
542
543 let mut comp_len = self.components.len();
544
545 // If we are moving from YCbCr -> Luma, we do not allocate storage for other components, so we
546 // will panic when we are trying to read samples, so for that case,
547 // hardcode it so that we don't panic when doing
548 // *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width]
549 if out_colorspace_components < comp_len && self.options.jpeg_get_out_colorspace() == Luma {
550 comp_len = out_colorspace_components;
551 }
552 let mut color_conv_function =
553 |num_iters: usize, samples: [&[i16]; 4]| -> Result<(), DecodeErrors> {
554 for (pos, output) in pixels[px..]
555 .chunks_exact_mut(width * out_colorspace_components)
556 .take(num_iters)
557 .enumerate()
558 {
559 let mut raw_samples: [&[i16]; 4] = [&[], &[], &[], &[]];
560
561 // iterate over each line, since color-convert needs only
562 // one line
563 for (j, samp) in raw_samples.iter_mut().enumerate().take(comp_len) {
564 *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width];
565 }
566 color_convert(
567 &raw_samples,
568 self.color_convert_16,
569 self.input_colorspace,
570 self.options.jpeg_get_out_colorspace(),
571 output,
572 width,
573 padded_width
574 )?;
575 px += width * out_colorspace_components;
576 }
577 Ok(())
578 };
579
580 let comps = &mut self.components[..];
581
582 if self.is_interleaved && self.options.jpeg_get_out_colorspace() != ColorSpace::Luma {
583 {
584 // duplicated so that we can check that samples match
585 // Fixes bug https://github.com/etemesi254/zune-image/issues/151
586 let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
587
588 for (samp, component) in samples.iter_mut().zip(comps.iter()) {
589 *samp = if component.sample_ratio == SampleRatios::None {
590 &component.raw_coeff
591 } else {
592 &component.upsample_dest
593 };
594 }
595 }
596 for comp in comps.iter_mut() {
597 upsample(
598 comp,
599 mcu_height,
600 i,
601 upsampler_scratch_space,
602 is_vertically_sampled
603 );
604 }
605
606 if is_vertically_sampled {
607 if i > 0 {
608 // write the last line, it wasn't up-sampled as we didn't have row_down
609 // yet
610 let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
611
612 for (samp, component) in samples.iter_mut().zip(comps.iter()) {
613 *samp = &component.first_row_upsample_dest;
614 }
615
616 // ensure length matches for all samples
617 let _first_len = samples[0].len();
618
619 // This was a good check, but can be caused to panic, esp on invalid/corrupt images.
620 // See one in issue https://github.com/etemesi254/zune-image/issues/262, so for now
621 // we just ignore and generate invalid images at the end.
622
623 //
624 //
625 // for samp in samples.iter().take(comp_len) {
626 // assert_eq!(first_len, samp.len());
627 // }
628 let num_iters = self.coeff * self.v_max;
629
630 color_conv_function(num_iters, samples)?;
631 }
632
633 // After up-sampling the last row, save any row that can be used for
634 // a later up-sampling,
635 //
636 // E.g the Y sample is not sampled but we haven't finished upsampling the last row of
637 // the previous mcu, since we don't have the down row, so save it
638 for component in comps.iter_mut() {
639 if component.sample_ratio != SampleRatios::H {
640 // We don't care about H sampling factors, since it's copied in the workers function
641
642 // copy last row to be used for the next color conversion
643 let size = component.vertical_sample
644 * component.width_stride
645 * component.sample_ratio.sample();
646
647 let last_bytes =
648 component.raw_coeff.rchunks_exact_mut(size).next().unwrap();
649
650 component
651 .first_row_upsample_dest
652 .copy_from_slice(last_bytes);
653 }
654 }
655 }
656
657 let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
658
659 for (samp, component) in samples.iter_mut().zip(comps.iter()) {
660 *samp = if component.sample_ratio == SampleRatios::None {
661 &component.raw_coeff
662 } else {
663 &component.upsample_dest
664 };
665 }
666
667 // we either do 7 or 8 MCU's depending on the state, this only applies to
668 // vertically sampled images
669 //
670 // for rows up until the last MCU, we do not upsample the last stride of the MCU
671 // which means that the number of iterations should take that into account is one less the
672 // up-sampled size
673 //
674 // For the last MCU, we upsample the last stride, meaning that if we hit the last MCU, we
675 // should sample full raw coeffs
676 let is_last_considered = is_vertically_sampled && (i != mcu_height.saturating_sub(1));
677
678 let num_iters = (8 - usize::from(is_last_considered)) * self.coeff * self.v_max;
679
680 color_conv_function(num_iters, samples)?;
681 } else {
682 let mut channels_ref: [&[i16]; MAX_COMPONENTS] = [&[]; MAX_COMPONENTS];
683
684 self.components
685 .iter()
686 .enumerate()
687 .for_each(|(pos, x)| channels_ref[pos] = &x.raw_coeff);
688
689 if let SampleRatios::Generic(_, v) = self.sub_sample_ratio {
690 color_conv_function(8 * v * self.coeff, channels_ref)?;
691 } else {
692 color_conv_function(8 * self.coeff, channels_ref)?;
693 }
694 }
695
696 *pixels_written = px;
697 Ok(())
698 }
699}
700
701enum McuContinuation {
702 Ok,
703 AnotherSos,
704 Terminate,
705}