zune_jpeg/
headers.rs

1/*
2 * Copyright (c) 2023.
3 *
4 * This software is free software;
5 *
6 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7 */
8
9//! Decode Decoder markers/segments
10//!
11//! This file deals with decoding header information in a jpeg file
12//!
13use alloc::format;
14use alloc::string::ToString;
15use alloc::vec::Vec;
16
17use zune_core::bytestream::ZReaderTrait;
18use zune_core::colorspace::ColorSpace;
19use zune_core::log::{debug, error, trace, warn};
20
21use crate::components::Components;
22use crate::decoder::{ICCChunk, JpegDecoder, MAX_COMPONENTS};
23use crate::errors::DecodeErrors;
24use crate::huffman::HuffmanTable;
25use crate::misc::{SOFMarkers, UN_ZIGZAG};
26
27///**B.2.4.2 Huffman table-specification syntax**
28#[allow(clippy::similar_names, clippy::cast_sign_loss)]
29pub(crate) fn parse_huffman<T: ZReaderTrait>(
30    decoder: &mut JpegDecoder<T>
31) -> Result<(), DecodeErrors>
32where
33{
34    // Read the length of the Huffman table
35    let mut dht_length = i32::from(decoder.stream.get_u16_be_err()?.checked_sub(2).ok_or(
36        DecodeErrors::FormatStatic("Invalid Huffman length in image")
37    )?);
38
39    while dht_length > 16 {
40        // HT information
41        let ht_info = decoder.stream.get_u8_err()?;
42        // third bit indicates whether the huffman encoding is DC or AC type
43        let dc_or_ac = (ht_info >> 4) & 0xF;
44        // Indicate the position of this table, should be less than 4;
45        let index = (ht_info & 0xF) as usize;
46        // read the number of symbols
47        let mut num_symbols: [u8; 17] = [0; 17];
48
49        if index >= MAX_COMPONENTS {
50            return Err(DecodeErrors::HuffmanDecode(format!(
51                "Invalid DHT index {index}, expected between 0 and 3"
52            )));
53        }
54
55        if dc_or_ac > 1 {
56            return Err(DecodeErrors::HuffmanDecode(format!(
57                "Invalid DHT position {dc_or_ac}, should be 0 or 1"
58            )));
59        }
60
61        decoder
62            .stream
63            .read_exact(&mut num_symbols[1..17])
64            .map_err(|_| DecodeErrors::ExhaustedData)?;
65
66        dht_length -= 1 + 16;
67
68        let symbols_sum: i32 = num_symbols.iter().map(|f| i32::from(*f)).sum();
69
70        // The sum of the number of symbols cannot be greater than 256;
71        if symbols_sum > 256 {
72            return Err(DecodeErrors::FormatStatic(
73                "Encountered Huffman table with excessive length in DHT"
74            ));
75        }
76        if symbols_sum > dht_length {
77            return Err(DecodeErrors::HuffmanDecode(format!(
78                "Excessive Huffman table of length {symbols_sum} found when header length is {dht_length}"
79            )));
80        }
81        dht_length -= symbols_sum;
82        // A table containing symbols in increasing code length
83        let mut symbols = [0; 256];
84
85        decoder
86            .stream
87            .read_exact(&mut symbols[0..(symbols_sum as usize)])
88            .map_err(|x| {
89                DecodeErrors::Format(format!("Could not read symbols into the buffer\n{x}"))
90            })?;
91        // store
92        match dc_or_ac {
93            0 => {
94                decoder.dc_huffman_tables[index] = Some(HuffmanTable::new(
95                    &num_symbols,
96                    symbols,
97                    true,
98                    decoder.is_progressive
99                )?);
100            }
101            _ => {
102                decoder.ac_huffman_tables[index] = Some(HuffmanTable::new(
103                    &num_symbols,
104                    symbols,
105                    false,
106                    decoder.is_progressive
107                )?);
108            }
109        }
110    }
111
112    if dht_length > 0 {
113        return Err(DecodeErrors::FormatStatic("Bogus Huffman table definition"));
114    }
115
116    Ok(())
117}
118
119///**B.2.4.1 Quantization table-specification syntax**
120#[allow(clippy::cast_possible_truncation, clippy::needless_range_loop)]
121pub(crate) fn parse_dqt<T: ZReaderTrait>(img: &mut JpegDecoder<T>) -> Result<(), DecodeErrors> {
122    // read length
123    let mut qt_length =
124        img.stream
125            .get_u16_be_err()?
126            .checked_sub(2)
127            .ok_or(DecodeErrors::FormatStatic(
128                "Invalid DQT length. Length should be greater than 2"
129            ))?;
130    // A single DQT header may have multiple QT's
131    while qt_length > 0 {
132        let qt_info = img.stream.get_u8_err()?;
133        // 0 = 8 bit otherwise 16 bit dqt
134        let precision = (qt_info >> 4) as usize;
135        // last 4 bits give us position
136        let table_position = (qt_info & 0x0f) as usize;
137        let precision_value = 64 * (precision + 1);
138
139        if (precision_value + 1) as u16 > qt_length {
140            return Err(DecodeErrors::DqtError(format!("Invalid QT table bytes left :{}. Too small to construct a valid qt table which should be {} long", qt_length, precision_value + 1)));
141        }
142
143        let dct_table = match precision {
144            0 => {
145                let mut qt_values = [0; 64];
146
147                img.stream.read_exact(&mut qt_values).map_err(|x| {
148                    DecodeErrors::Format(format!("Could not read symbols into the buffer\n{x}"))
149                })?;
150                qt_length -= (precision_value as u16) + 1 /*QT BIT*/;
151                // carry out un zig-zag here
152                un_zig_zag(&qt_values)
153            }
154            1 => {
155                // 16 bit quantization tables
156                let mut qt_values = [0_u16; 64];
157
158                for i in 0..64 {
159                    qt_values[i] = img.stream.get_u16_be_err()?;
160                }
161                qt_length -= (precision_value as u16) + 1;
162
163                un_zig_zag(&qt_values)
164            }
165            _ => {
166                return Err(DecodeErrors::DqtError(format!(
167                    "Expected QT precision value of either 0 or 1, found {precision:?}"
168                )));
169            }
170        };
171
172        if table_position >= MAX_COMPONENTS {
173            return Err(DecodeErrors::DqtError(format!(
174                "Too large table position for QT :{table_position}, expected between 0 and 3"
175            )));
176        }
177
178        img.qt_tables[table_position] = Some(dct_table);
179    }
180
181    return Ok(());
182}
183
184/// Section:`B.2.2 Frame header syntax`
185
186pub(crate) fn parse_start_of_frame<T: ZReaderTrait>(
187    sof: SOFMarkers, img: &mut JpegDecoder<T>
188) -> Result<(), DecodeErrors> {
189    if img.seen_sof {
190        return Err(DecodeErrors::SofError(
191            "Two Start of Frame Markers".to_string()
192        ));
193    }
194    // Get length of the frame header
195    let length = img.stream.get_u16_be_err()?;
196    // usually 8, but can be 12 and 16, we currently support only 8
197    // so sorry about that 12 bit images
198    let dt_precision = img.stream.get_u8_err()?;
199
200    if dt_precision != 8 {
201        return Err(DecodeErrors::SofError(format!(
202            "The library can only parse 8-bit images, the image has {dt_precision} bits of precision"
203        )));
204    }
205
206    img.info.set_density(dt_precision);
207
208    // read  and set the image height.
209    let img_height = img.stream.get_u16_be_err()?;
210    img.info.set_height(img_height);
211
212    // read and set the image width
213    let img_width = img.stream.get_u16_be_err()?;
214    img.info.set_width(img_width);
215
216    trace!("Image width  :{}", img_width);
217    trace!("Image height :{}", img_height);
218
219    if usize::from(img_width) > img.options.get_max_width() {
220        return Err(DecodeErrors::Format(format!("Image width {} greater than width limit {}. If use `set_limits` if you want to support huge images", img_width, img.options.get_max_width())));
221    }
222
223    if usize::from(img_height) > img.options.get_max_height() {
224        return Err(DecodeErrors::Format(format!("Image height {} greater than height limit {}. If use `set_limits` if you want to support huge images", img_height, img.options.get_max_height())));
225    }
226
227    // Check image width or height is zero
228    if img_width == 0 || img_height == 0 {
229        return Err(DecodeErrors::ZeroError);
230    }
231
232    // Number of components for the image.
233    let num_components = img.stream.get_u8_err()?;
234
235    if num_components == 0 {
236        return Err(DecodeErrors::SofError(
237            "Number of components cannot be zero.".to_string()
238        ));
239    }
240
241    let expected = 8 + 3 * u16::from(num_components);
242    // length should be equal to num components
243    if length != expected {
244        return Err(DecodeErrors::SofError(format!(
245            "Length of start of frame differs from expected {expected},value is {length}"
246        )));
247    }
248
249    trace!("Image components : {}", num_components);
250
251    if num_components == 1 {
252        // SOF sets the number of image components
253        // and that to us translates to setting input and output
254        // colorspaces to zero
255        img.input_colorspace = ColorSpace::Luma;
256        img.options = img.options.jpeg_set_out_colorspace(ColorSpace::Luma);
257        debug!("Overriding default colorspace set to Luma");
258    }
259    if num_components == 4 && img.input_colorspace == ColorSpace::YCbCr {
260        trace!("Input image has 4 components, defaulting to CMYK colorspace");
261        // https://entropymine.wordpress.com/2018/10/22/how-is-a-jpeg-images-color-type-determined/
262        img.input_colorspace = ColorSpace::CMYK;
263    }
264
265    // set number of components
266    img.info.components = num_components;
267
268    let mut components = Vec::with_capacity(num_components as usize);
269    let mut temp = [0; 3];
270
271    for pos in 0..num_components {
272        // read 3 bytes for each component
273        img.stream
274            .read_exact(&mut temp)
275            .map_err(|x| DecodeErrors::Format(format!("Could not read component data\n{x}")))?;
276        // create a component.
277        let component = Components::from(temp, pos)?;
278
279        components.push(component);
280    }
281    img.seen_sof = true;
282
283    img.info.set_sof_marker(sof);
284
285    img.components = components;
286
287    Ok(())
288}
289
290/// Parse a start of scan data
291pub(crate) fn parse_sos<T: ZReaderTrait>(image: &mut JpegDecoder<T>) -> Result<(), DecodeErrors> {
292    // Scan header length
293    let ls = image.stream.get_u16_be_err()?;
294    // Number of image components in scan
295    let ns = image.stream.get_u8_err()?;
296
297    let mut seen = [-1; { MAX_COMPONENTS + 1 }];
298
299    image.num_scans = ns;
300
301    if ls != 6 + 2 * u16::from(ns) {
302        return Err(DecodeErrors::SosError(format!(
303            "Bad SOS length {ls},corrupt jpeg"
304        )));
305    }
306
307    // Check number of components.
308    if !(1..5).contains(&ns) {
309        return Err(DecodeErrors::SosError(format!(
310            "Number of components in start of scan should be less than 3 but more than 0. Found {ns}"
311        )));
312    }
313
314    if image.info.components == 0 {
315        return Err(DecodeErrors::FormatStatic(
316            "Error decoding SOF Marker, Number of components cannot be zero."
317        ));
318    }
319
320    // consume spec parameters
321    for i in 0..ns {
322        // CS_i parameter, I don't need it so I might as well delete it
323        let id = image.stream.get_u8_err()?;
324
325        if seen.contains(&i32::from(id)) {
326            return Err(DecodeErrors::SofError(format!(
327                "Duplicate ID {id} seen twice in the same component"
328            )));
329        }
330
331        seen[usize::from(i)] = i32::from(id);
332        // DC and AC huffman table position
333        // top 4 bits contain dc huffman destination table
334        // lower four bits contain ac huffman destination table
335        let y = image.stream.get_u8_err()?;
336
337        let mut j = 0;
338
339        while j < image.info.components {
340            if image.components[j as usize].id == id {
341                break;
342            }
343
344            j += 1;
345        }
346
347        if j == image.info.components {
348            return Err(DecodeErrors::SofError(format!(
349                "Invalid component id {}, expected a value between 0 and {}",
350                id,
351                image.components.len()
352            )));
353        }
354
355        image.components[usize::from(j)].dc_huff_table = usize::from((y >> 4) & 0xF);
356        image.components[usize::from(j)].ac_huff_table = usize::from(y & 0xF);
357        image.z_order[i as usize] = j as usize;
358    }
359
360    // Collect the component spec parameters
361    // This is only needed for progressive images but I'll read
362    // them in order to ensure they are correct according to the spec
363
364    // Extract progressive information
365
366    // https://www.w3.org/Graphics/JPEG/itu-t81.pdf
367    // Page 42
368
369    // Start of spectral / predictor selection. (between 0 and 63)
370    image.spec_start = image.stream.get_u8_err()?;
371    // End of spectral selection
372    image.spec_end = image.stream.get_u8_err()?;
373
374    let bit_approx = image.stream.get_u8_err()?;
375    // successive approximation bit position high
376    image.succ_high = bit_approx >> 4;
377
378    if image.spec_end > 63 {
379        return Err(DecodeErrors::SosError(format!(
380            "Invalid Se parameter {}, range should be 0-63",
381            image.spec_end
382        )));
383    }
384    if image.spec_start > 63 {
385        return Err(DecodeErrors::SosError(format!(
386            "Invalid Ss parameter {}, range should be 0-63",
387            image.spec_start
388        )));
389    }
390    if image.succ_high > 13 {
391        return Err(DecodeErrors::SosError(format!(
392            "Invalid Ah parameter {}, range should be 0-13",
393            image.succ_low
394        )));
395    }
396    // successive approximation bit position low
397    image.succ_low = bit_approx & 0xF;
398
399    if image.succ_low > 13 {
400        return Err(DecodeErrors::SosError(format!(
401            "Invalid Al parameter {}, range should be 0-13",
402            image.succ_low
403        )));
404    }
405
406    trace!(
407        "Ss={}, Se={} Ah={} Al={}",
408        image.spec_start,
409        image.spec_end,
410        image.succ_high,
411        image.succ_low
412    );
413
414    Ok(())
415}
416
417/// Parse the APP13 (IPTC) segment.
418pub(crate) fn parse_app13<T: ZReaderTrait>(
419    decoder: &mut JpegDecoder<T>,
420) -> Result<(), DecodeErrors> {
421    const IPTC_PREFIX: &[u8] = b"Photoshop 3.0";
422    // skip length.
423    let mut length = usize::from(decoder.stream.get_u16_be());
424
425    if length < 2 {
426        return Err(DecodeErrors::FormatStatic("Too small APP13 length"));
427    }
428    // length bytes.
429    length -= 2;
430
431    if length > IPTC_PREFIX.len() && decoder.stream.peek_at(0, IPTC_PREFIX.len())? == IPTC_PREFIX {
432        // skip bytes we read above.
433        decoder.stream.skip(IPTC_PREFIX.len());
434        length -= IPTC_PREFIX.len();
435
436        let iptc_bytes = decoder.stream.peek_at(0, length)?.to_vec();
437
438        decoder.info.iptc_data = Some(iptc_bytes);
439    }
440
441    decoder.stream.skip(length);
442    Ok(())
443}
444
445/// Parse Adobe App14 segment
446pub(crate) fn parse_app14<T: ZReaderTrait>(
447    decoder: &mut JpegDecoder<T>
448) -> Result<(), DecodeErrors> {
449    // skip length
450    let mut length = usize::from(decoder.stream.get_u16_be());
451
452    if length < 2 || !decoder.stream.has(length - 2) {
453        return Err(DecodeErrors::ExhaustedData);
454    }
455    if length < 14 {
456        return Err(DecodeErrors::FormatStatic(
457            "Too short of a length for App14 segment"
458        ));
459    }
460    if decoder.stream.peek_at(0, 5) == Ok(b"Adobe") {
461        // move stream 6 bytes to remove adobe id
462        decoder.stream.skip(6);
463        // skip version, flags0 and flags1
464        decoder.stream.skip(5);
465        // get color transform
466        let transform = decoder.stream.get_u8();
467        // https://exiftool.org/TagNames/JPEG.html#Adobe
468        match transform {
469            0 => decoder.input_colorspace = ColorSpace::CMYK,
470            1 => decoder.input_colorspace = ColorSpace::YCbCr,
471            2 => decoder.input_colorspace = ColorSpace::YCCK,
472            _ => {
473                return Err(DecodeErrors::Format(format!(
474                    "Unknown Adobe colorspace {transform}"
475                )))
476            }
477        }
478        // length   = 2
479        // adobe id = 6
480        // version =  5
481        // transform = 1
482        length = length.saturating_sub(14);
483    } else if decoder.options.get_strict_mode() {
484        return Err(DecodeErrors::FormatStatic("Corrupt Adobe App14 segment"));
485    } else {
486        length = length.saturating_sub(2);
487        error!("Not a valid Adobe APP14 Segment");
488    }
489    // skip any proceeding lengths.
490    // we do not need them
491    decoder.stream.skip(length);
492
493    Ok(())
494}
495
496/// Parse the APP1 segment
497///
498/// This contains the exif tag
499pub(crate) fn parse_app1<T: ZReaderTrait>(
500    decoder: &mut JpegDecoder<T>
501) -> Result<(), DecodeErrors> {
502    // contains exif data
503    let mut length = usize::from(decoder.stream.get_u16_be());
504
505    if length < 2 || !decoder.stream.has(length - 2) {
506        return Err(DecodeErrors::ExhaustedData);
507    }
508    // length bytes
509    length -= 2;
510
511    if length > 6 && decoder.stream.peek_at(0, 6).unwrap() == b"Exif\x00\x00" {
512        trace!("Exif segment present");
513        // skip bytes we read above
514        decoder.stream.skip(6);
515        length -= 6;
516
517        let exif_bytes = decoder.stream.peek_at(0, length).unwrap().to_vec();
518
519        decoder.exif_data = Some(exif_bytes);
520    } else {
521        warn!("Wrongly formatted exif tag");
522    }
523
524    decoder.stream.skip(length);
525    Ok(())
526}
527
528pub(crate) fn parse_app2<T: ZReaderTrait>(
529    decoder: &mut JpegDecoder<T>
530) -> Result<(), DecodeErrors> {
531    let mut length = usize::from(decoder.stream.get_u16_be());
532
533    if length < 2 || !decoder.stream.has(length - 2) {
534        return Err(DecodeErrors::ExhaustedData);
535    }
536    // length bytes
537    length -= 2;
538
539    if length > 14 && decoder.stream.peek_at(0, 12).unwrap() == *b"ICC_PROFILE\0" {
540        trace!("ICC Profile present");
541        // skip 12 bytes which indicate ICC profile
542        length -= 12;
543        decoder.stream.skip(12);
544        let seq_no = decoder.stream.get_u8();
545        let num_markers = decoder.stream.get_u8();
546        // deduct the two bytes we read above
547        length -= 2;
548
549        let data = decoder.stream.peek_at(0, length).unwrap().to_vec();
550
551        let icc_chunk = ICCChunk {
552            seq_no,
553            num_markers,
554            data
555        };
556        decoder.icc_data.push(icc_chunk);
557    }
558
559    decoder.stream.skip(length);
560
561    Ok(())
562}
563
564/// Small utility function to print Un-zig-zagged quantization tables
565
566fn un_zig_zag<T>(a: &[T]) -> [i32; 64]
567where
568    T: Default + Copy,
569    i32: core::convert::From<T>
570{
571    let mut output = [i32::default(); 64];
572
573    for i in 0..64 {
574        output[UN_ZIGZAG[i]] = i32::from(a[i]);
575    }
576
577    output
578}