exr/compression/piz/
huffman.rs

1//! 16-bit Huffman compression and decompression.
2//! Huffman compression and decompression routines written
3//!	by Christian Rouet for his PIZ image file format.
4// see https://github.com/AcademySoftwareFoundation/openexr/blob/88246d991e0318c043e6f584f7493da08a31f9f8/OpenEXR/IlmImf/ImfHuf.cpp
5
6use crate::math::RoundingMode;
7use crate::error::{Error, Result, UnitResult, u64_to_usize, u32_to_usize};
8use crate::io::Data;
9use std::{
10    cmp::Ordering,
11    collections::BinaryHeap,
12    io::{Cursor, Read, Write},
13};
14use std::convert::TryFrom;
15use smallvec::SmallVec;
16
17
18pub fn decompress(compressed: &[u8], expected_size: usize) -> Result<Vec<u16>> {
19    let mut remaining_compressed = compressed;
20
21    let min_code_index = usize::try_from(u32::read(&mut remaining_compressed)?)?;
22    let max_code_index_32 = u32::read(&mut remaining_compressed)?;
23    let _table_size = usize::try_from(u32::read(&mut remaining_compressed)?)?; // TODO check this and return Err?
24    let bit_count = usize::try_from(u32::read(&mut remaining_compressed)?)?;
25    let _skipped = u32::read(&mut remaining_compressed)?; // what is this
26
27    let max_code_index = usize::try_from(max_code_index_32).unwrap();
28    if min_code_index >= ENCODING_TABLE_SIZE || max_code_index >= ENCODING_TABLE_SIZE {
29        return Err(Error::invalid(INVALID_TABLE_SIZE));
30    }
31
32    if RoundingMode::Up.divide(bit_count, 8) > remaining_compressed.len() {
33        return Err(Error::invalid(NOT_ENOUGH_DATA));
34    }
35
36    let encoding_table = read_encoding_table(&mut remaining_compressed, min_code_index, max_code_index)?;
37    if bit_count > 8 * remaining_compressed.len() { return Err(Error::invalid(INVALID_BIT_COUNT)); }
38
39    let decoding_table = build_decoding_table(&encoding_table, min_code_index, max_code_index)?;
40
41    let result = decode_with_tables(
42        &encoding_table,
43        &decoding_table,
44        &remaining_compressed,
45        i32::try_from(bit_count)?,
46        max_code_index_32,
47        expected_size,
48    )?;
49
50    Ok(result)
51}
52
53pub fn compress(uncompressed: &[u16]) -> Result<Vec<u8>> {
54    if uncompressed.is_empty() { return Ok(vec![]); }
55
56    let mut frequencies = count_frequencies(uncompressed);
57    let (min_code_index, max_code_index) = build_encoding_table(&mut frequencies);
58
59    let mut result = Cursor::new(Vec::with_capacity(uncompressed.len()));
60    u32::write_slice(&mut result, &[0; 5])?; // we come back to these later after we know more about the compressed data
61
62    let table_start = result.position();
63    pack_encoding_table(
64        &frequencies,
65        min_code_index,
66        max_code_index,
67        &mut result,
68    )?;
69
70    let data_start = result.position();
71    let bit_count = encode_with_frequencies(
72        &frequencies,
73        uncompressed,
74        max_code_index,
75        &mut result
76    )?;
77
78    // write meta data after this
79    result.set_position(0);
80    let table_length = data_start - table_start;
81
82    u32::try_from(min_code_index)?.write(&mut result)?;
83    u32::try_from(max_code_index)?.write(&mut result)?;
84    u32::try_from(table_length)?.write(&mut result)?;
85    u32::try_from(bit_count)?.write(&mut result)?;
86    0_u32.write(&mut result)?;
87
88    Ok(result.into_inner())
89}
90
91
92const ENCODE_BITS: u64 = 16; // literal (value) bit length
93const DECODE_BITS: u64 = 14; // decoding bit size (>= 8)
94
95const ENCODING_TABLE_SIZE: usize = ((1 << ENCODE_BITS) + 1) as usize;
96const DECODING_TABLE_SIZE: usize = (1 << DECODE_BITS) as usize;
97const DECODE_MASK: u64 = DECODING_TABLE_SIZE as u64 - 1;
98
99const SHORT_ZEROCODE_RUN: u64 = 59;
100const LONG_ZEROCODE_RUN: u64 = 63;
101const SHORTEST_LONG_RUN: u64 = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;
102const LONGEST_LONG_RUN: u64 = 255 + SHORTEST_LONG_RUN;
103
104
105#[derive(Clone, Debug, Eq, PartialEq)]
106enum Code {
107    Empty,
108    Short(ShortCode),
109    Long(SmallVec<[u32; 2]>), // often 2, sometimes 4, rarely 8
110}
111
112#[derive(Clone, Debug, Eq, PartialEq)]
113struct ShortCode {
114    value: u32,
115    len: u8,
116}
117
118impl ShortCode {
119    #[inline] fn len(&self) -> u64 { u64::from(self.len) }
120}
121
122/// Decode (uncompress) n bits based on encoding & decoding tables:
123fn decode_with_tables(
124    encoding_table: &[u64],
125    decoding_table: &[Code],
126    mut input: &[u8],
127    input_bit_count: i32,
128    run_length_code: u32,
129    expected_output_size: usize,
130) -> Result<Vec<u16>>
131{
132    let mut output = Vec::with_capacity(expected_output_size);
133    let mut code_bits = 0_u64;
134    let mut code_bit_count = 0_u64;
135
136    while input.len() > 0 {
137        read_byte(&mut code_bits, &mut code_bit_count, &mut input)?;
138
139        // Access decoding table
140        while code_bit_count >= DECODE_BITS {
141            let code_index = (code_bits >> (code_bit_count - DECODE_BITS)) & DECODE_MASK;
142            let code = &decoding_table[u64_to_usize(code_index)];
143
144            // Get short code
145            if let Code::Short(code) = code {
146                code_bit_count -= code.len();
147
148                read_code_into_vec(
149                    code.value,
150                    run_length_code,
151                    &mut code_bits,
152                    &mut code_bit_count,
153                    &mut input,
154                    &mut output,
155                    expected_output_size,
156                )?;
157            }
158            else if let Code::Long(ref long_codes) = code {
159                debug_assert_ne!(long_codes.len(), 0);
160
161                let long_code = long_codes.iter()
162                    .filter_map(|&long_code|{
163                        let encoded_long_code = encoding_table[u32_to_usize(long_code)];
164                        let length = length(encoded_long_code);
165
166                        while code_bit_count < length && input.len() > 0 {
167                            let err = read_byte(&mut code_bits, &mut code_bit_count, &mut input);
168                            if let Err(err) = err { return Some(Err(err)); }
169                        }
170
171                        if code_bit_count >= length {
172                            let required_code = (code_bits >> (code_bit_count - length)) & ((1 << length) - 1);
173
174                            if self::code(encoded_long_code) == required_code {
175                                code_bit_count -= length;
176                                return Some(Ok(long_code));
177                            }
178                        }
179
180                        None
181
182                    })
183                    .next()
184                    .ok_or(Error::invalid(INVALID_CODE))?;
185
186                read_code_into_vec(
187                    long_code?,
188                    run_length_code,
189                    &mut code_bits,
190                    &mut code_bit_count,
191                    &mut input,
192                    &mut output,
193                    expected_output_size,
194                )?;
195            }
196            else {
197                return Err(Error::invalid(INVALID_CODE));
198            }
199        }
200    }
201
202    let count = u64::try_from((8 - input_bit_count) & 7)?;
203    code_bits >>= count;
204
205    code_bit_count = code_bit_count.checked_sub(count)
206        .ok_or_else(|| Error::invalid("code"))?;
207
208    while code_bit_count > 0 {
209        let index = (code_bits << (DECODE_BITS - code_bit_count)) & DECODE_MASK;
210        let code = &decoding_table[u64_to_usize(index)];
211
212        if let Code::Short(short_code) = code {
213            if short_code.len() > code_bit_count { return Err(Error::invalid("code")) }; // FIXME why does this happen??
214            code_bit_count -= short_code.len(); // FIXME may throw "attempted to subtract with overflow"
215
216            read_code_into_vec(
217                short_code.value,
218                run_length_code,
219                &mut code_bits,
220                &mut code_bit_count,
221                &mut input,
222                &mut output,
223                expected_output_size,
224            )?;
225        }
226        else {
227            return Err(Error::invalid(INVALID_CODE));
228        }
229    }
230
231    if output.len() != expected_output_size {
232        return Err(Error::invalid(NOT_ENOUGH_DATA));
233    }
234
235    Ok(output)
236}
237
238/// Build a decoding hash table based on the encoding table code:
239///	- short codes (<= HUF_DECBITS) are resolved with a single table access;
240///	- long code entry allocations are not optimized, because long codes are
241///	  unfrequent;
242///	- decoding tables are used by hufDecode();
243fn build_decoding_table(
244    encoding_table: &[u64],
245    min_code_index: usize,
246    max_code_index: usize,
247) -> Result<Vec<Code>>
248{
249    let mut decoding_table = vec![Code::Empty; DECODING_TABLE_SIZE]; // not an array because of code not being copy
250
251    for (code_index, &encoded_code) in encoding_table[..= max_code_index].iter().enumerate().skip(min_code_index) {
252        let code_index = u32::try_from(code_index).unwrap();
253
254        let code = code(encoded_code);
255        let length = length(encoded_code);
256
257        if code >> length != 0 {
258            return Err(Error::invalid(INVALID_TABLE_ENTRY));
259        }
260
261        if length > DECODE_BITS {
262            let long_code = &mut decoding_table[u64_to_usize(code >> (length - DECODE_BITS))];
263
264            match long_code {
265                Code::Empty => *long_code = Code::Long(smallvec![code_index]),
266                Code::Long(lits) => lits.push(code_index),
267                _ => { return Err(Error::invalid(INVALID_TABLE_ENTRY)); }
268            }
269        }
270        else if length != 0 {
271            let default_value = Code::Short(ShortCode {
272                value: code_index,
273                len: length as u8,
274            });
275
276            let start_index = u64_to_usize(code << (DECODE_BITS - length));
277            let count = u64_to_usize(1 << (DECODE_BITS - length));
278
279            for value in &mut decoding_table[start_index .. start_index + count] {
280                *value = default_value.clone();
281            }
282        }
283    }
284
285    Ok(decoding_table)
286}
287
288/// Run-length-decompresses all zero runs from the packed table to the encoding table
289fn read_encoding_table(
290    packed: &mut impl Read,
291    min_code_index: usize,
292    max_code_index: usize,
293) -> Result<Vec<u64>>
294{
295    let mut code_bits = 0_u64;
296    let mut code_bit_count = 0_u64;
297
298    // TODO push() into encoding table instead of index stuff?
299    let mut encoding_table = vec![0_u64; ENCODING_TABLE_SIZE];
300    let mut code_index = min_code_index;
301    while code_index <= max_code_index {
302        let code_len = read_bits(6, &mut code_bits, &mut code_bit_count, packed)?;
303        encoding_table[code_index] = code_len;
304
305        if code_len == LONG_ZEROCODE_RUN {
306            let zerun_bits = read_bits(8, &mut code_bits, &mut code_bit_count, packed)?;
307            let zerun = usize::try_from(zerun_bits + SHORTEST_LONG_RUN).unwrap();
308
309            if code_index + zerun > max_code_index + 1 {
310                return Err(Error::invalid(TABLE_TOO_LONG));
311            }
312
313            for value in &mut encoding_table[code_index..code_index + zerun] {
314                *value = 0;
315            }
316
317            code_index += zerun;
318        }
319        else if code_len >= SHORT_ZEROCODE_RUN {
320            let duplication_count = usize::try_from(code_len - SHORT_ZEROCODE_RUN + 2).unwrap();
321            if code_index + duplication_count > max_code_index + 1 {
322                return Err(Error::invalid(TABLE_TOO_LONG));
323            }
324
325            for value in &mut encoding_table[code_index .. code_index + duplication_count] {
326                *value = 0;
327            }
328
329            code_index += duplication_count;
330        }
331        else {
332            code_index += 1;
333        }
334    }
335
336    build_canonical_table(&mut encoding_table);
337    Ok(encoding_table)
338}
339
340// TODO Use BitStreamReader for all the bit reads?!
341#[inline]
342fn read_bits(
343    count: u64,
344    code_bits: &mut u64,
345    code_bit_count: &mut u64,
346    input: &mut impl Read,
347) -> Result<u64>
348{
349    while *code_bit_count < count {
350        read_byte(code_bits, code_bit_count, input)?;
351    }
352
353    *code_bit_count -= count;
354    Ok((*code_bits >> *code_bit_count) & ((1 << count) - 1))
355}
356
357#[inline]
358fn read_byte(code_bits: &mut u64, bit_count: &mut u64, input: &mut impl Read) -> UnitResult {
359    *code_bits = (*code_bits << 8) | u8::read(input)? as u64;
360    *bit_count += 8;
361    Ok(())
362}
363
364#[inline]
365fn read_code_into_vec(
366    code: u32,
367    run_length_code: u32,
368    code_bits: &mut u64,
369    code_bit_count: &mut u64,
370    read: &mut impl Read,
371    out: &mut Vec<u16>,
372    max_len: usize,
373) -> UnitResult
374{
375    if code == run_length_code { // code may be too large for u16
376        if *code_bit_count < 8 {
377            read_byte(code_bits, code_bit_count, read)?;
378        }
379
380        *code_bit_count -= 8;
381
382        let code_repetitions = usize::from((*code_bits >> *code_bit_count) as u8);
383
384        if out.len() + code_repetitions > max_len {
385            return Err(Error::invalid(TOO_MUCH_DATA));
386        }
387        else if out.is_empty() {
388            return Err(Error::invalid(NOT_ENOUGH_DATA));
389        }
390
391        let repeated_code = *out.last().unwrap();
392        out.extend(std::iter::repeat(repeated_code).take(code_repetitions));
393    }
394    else if out.len() < max_len { // implies that code is not larger than u16???
395        out.push(u16::try_from(code)?);
396    }
397    else {
398        return Err(Error::invalid(TOO_MUCH_DATA));
399    }
400
401    Ok(())
402}
403
404fn count_frequencies(data: &[u16]) -> Vec<u64> {
405    let mut frequencies = vec![0_u64; ENCODING_TABLE_SIZE];
406
407    for value in data {
408        frequencies[*value as usize] += 1;
409    }
410
411    frequencies
412}
413
414fn write_bits(
415    count: u64,
416    bits: u64,
417    code_bits: &mut u64,
418    code_bit_count: &mut u64,
419    mut out: impl Write,
420) -> UnitResult
421{
422    *code_bits = (*code_bits << count) | bits;
423    *code_bit_count += count;
424
425    while *code_bit_count >= 8 {
426        *code_bit_count -= 8;
427        out.write(&[
428            (*code_bits >> *code_bit_count) as u8 // TODO make sure never or always wraps?
429        ])?;
430    }
431
432    Ok(())
433}
434
435fn write_code(scode: u64, code_bits: &mut u64, code_bit_count: &mut u64, mut out: impl Write) -> UnitResult {
436    write_bits(length(scode), code(scode), code_bits, code_bit_count, &mut out)
437}
438
439#[inline(always)]
440fn send_code(
441    scode: u64,
442    run_count: u64,
443    run_code: u64,
444    code_bits: &mut u64,
445    code_bit_count: &mut u64,
446    mut out: impl Write,
447) -> UnitResult
448{
449    // Output a run of runCount instances of the symbol sCount.
450    // Output the symbols explicitly, or if that is shorter, output
451    // the sCode symbol once followed by a runCode symbol and runCount
452    // expressed as an 8-bit number.
453    if length(scode) + length(run_code) + 8 < length(scode) * run_count {
454        write_code(scode, code_bits, code_bit_count, &mut out)?;
455        write_code(run_code, code_bits, code_bit_count, &mut out)?;
456        write_bits(8, run_count, code_bits, code_bit_count, &mut out)?;
457    }
458    else {
459        for _ in 0 ..= run_count {
460            write_code(scode, code_bits, code_bit_count, &mut out)?;
461        }
462    }
463
464    Ok(())
465}
466
467fn encode_with_frequencies(
468    frequencies: &[u64],
469    uncompressed: &[u16],
470    run_length_code: usize,
471    mut out: &mut Cursor<Vec<u8>>,
472) -> Result<u64>
473{
474    let mut code_bits = 0;
475    let mut code_bit_count = 0;
476
477    let mut run_start_value = uncompressed[0];
478    let mut run_length = 0;
479
480    let start_position = out.position();
481
482    // Loop on input values
483    for &current_value in &uncompressed[1..] {
484        // Count same values or send code
485        if run_start_value == current_value && run_length < 255 {
486            run_length += 1;
487        }
488        else {
489            send_code(
490                frequencies[run_start_value as usize],
491                run_length,
492                frequencies[run_length_code],
493                &mut code_bits,
494                &mut code_bit_count,
495                &mut out,
496            )?;
497
498            run_length = 0;
499        }
500
501        run_start_value = current_value;
502    }
503
504    // Send remaining code
505    send_code(
506        frequencies[run_start_value as usize],
507        run_length,
508        frequencies[run_length_code],
509        &mut code_bits,
510        &mut code_bit_count,
511        &mut out,
512    )?;
513
514    let data_length = out.position() - start_position; // we shouldn't count the last byte write
515
516    if code_bit_count != 0 {
517        out.write(&[
518            (code_bits << (8 - code_bit_count) & 0xff) as u8
519        ])?;
520    }
521
522    Ok(data_length * 8 + code_bit_count)
523}
524
525///
526/// Pack an encoding table:
527///	- only code lengths, not actual codes, are stored
528///	- runs of zeroes are compressed as follows:
529///
530///	  unpacked		packed
531///	  --------------------------------
532///	  1 zero		0	(6 bits)
533///	  2 zeroes		59
534///	  3 zeroes		60
535///	  4 zeroes		61
536///	  5 zeroes		62
537///	  n zeroes (6 or more)	63 n-6	(6 + 8 bits)
538///
539fn pack_encoding_table(
540    frequencies: &[u64],
541    min_index: usize,
542    max_index: usize,
543    mut out: &mut Cursor<Vec<u8>>,
544) -> UnitResult
545{
546    let mut code_bits = 0_u64;
547    let mut code_bit_count = 0_u64;
548
549    let mut frequency_index = min_index;
550    while frequency_index <= max_index { // TODO slice iteration?
551        let code_length = length(frequencies[frequency_index]);
552
553        if code_length == 0 {
554            let mut zero_run = 1;
555
556            while frequency_index < max_index && zero_run < LONGEST_LONG_RUN {
557                if length(frequencies[frequency_index + 1]) > 0 {
558                    break;
559                }
560
561                frequency_index += 1;
562                zero_run += 1;
563            }
564
565            if zero_run >= 2 {
566                if zero_run >= SHORTEST_LONG_RUN {
567                    write_bits(6, LONG_ZEROCODE_RUN, &mut code_bits, &mut code_bit_count, &mut out)?;
568                    write_bits(8, zero_run - SHORTEST_LONG_RUN, &mut code_bits, &mut code_bit_count, &mut out)?;
569                }
570                else {
571                    write_bits(6, SHORT_ZEROCODE_RUN + zero_run - 2, &mut code_bits, &mut code_bit_count, &mut out)?;
572                }
573
574                frequency_index += 1; // we must increment or else this may go very wrong
575                continue;
576            }
577        }
578
579        write_bits(6, code_length, &mut code_bits, &mut code_bit_count, &mut out)?;
580        frequency_index += 1;
581    }
582
583    if code_bit_count > 0 {
584        out.write(&[
585            (code_bits << (8 - code_bit_count)) as u8
586        ])?;
587    }
588
589    Ok(())
590}
591
592/// Build a "canonical" Huffman code table:
593///	- for each (uncompressed) symbol, code contains the length
594///	  of the corresponding code (in the compressed data)
595///	- canonical codes are computed and stored in code
596///	- the rules for constructing canonical codes are as follows:
597///	  * shorter codes (if filled with zeroes to the right)
598///	    have a numerically higher value than longer codes
599///	  * for codes with the same length, numerical values
600///	    increase with numerical symbol values
601///	- because the canonical code table can be constructed from
602///	  symbol lengths alone, the code table can be transmitted
603///	  without sending the actual code values
604///	- see http://www.compressconsult.com/huffman/
605fn build_canonical_table(code_table: &mut [u64]) {
606    debug_assert_eq!(code_table.len(), ENCODING_TABLE_SIZE);
607
608    let mut count_per_code = [0_u64; 59];
609
610    for &code in code_table.iter() {
611        count_per_code[u64_to_usize(code)] += 1;
612    }
613
614    // For each i from 58 through 1, compute the
615    // numerically lowest code with length i, and
616    // store that code in n[i].
617    {
618        let mut code = 0_u64; // TODO use foldr?
619        for count in &mut count_per_code.iter_mut().rev() {
620            let next_code = (code + *count) >> 1;
621            *count = code;
622            code = next_code;
623        }
624    }
625
626    // code[i] contains the length, l, of the
627    // code for symbol i.  Assign the next available
628    // code of length l to the symbol and store both
629    // l and the code in code[i]. // TODO iter + filter ?
630    for symbol_length in code_table.iter_mut() {
631        let current_length = *symbol_length;
632        let code_index = u64_to_usize(current_length);
633        if current_length > 0 {
634            *symbol_length = current_length | (count_per_code[code_index] << 6);
635            count_per_code[code_index] += 1;
636        }
637    }
638}
639
640
641/// Compute Huffman codes (based on frq input) and store them in frq:
642///	- code structure is : [63:lsb - 6:msb] | [5-0: bit length];
643///	- max code length is 58 bits;
644///	- codes outside the range [im-iM] have a null length (unused values);
645///	- original frequencies are destroyed;
646///	- encoding tables are used by hufEncode() and hufBuildDecTable();
647///
648/// NB: The following code "(*a == *b) && (a > b))" was added to ensure
649///     elements in the heap with the same value are sorted by index.
650///     This is to ensure, the STL make_heap()/pop_heap()/push_heap() methods
651///     produced a resultant sorted heap that is identical across OSes.
652fn build_encoding_table(
653    frequencies: &mut [u64], // input frequencies, output encoding table
654) -> (usize, usize) // return frequency max min range
655{
656    debug_assert_eq!(frequencies.len(), ENCODING_TABLE_SIZE);
657
658    /// Frequency with position, used for MinHeap.
659    #[derive(Eq, PartialEq, Copy, Clone)]
660    struct HeapFrequency {
661        position: usize,
662        frequency: u64,
663    }
664
665    impl Ord for HeapFrequency {
666        fn cmp(&self, other: &Self) -> Ordering {
667            other.frequency.cmp(&self.frequency)
668                .then_with(|| other.position.cmp(&self.position))
669        }
670    }
671
672    impl PartialOrd for HeapFrequency {
673        fn partial_cmp(&self, other: &Self) -> Option<Ordering> { Some(self.cmp(other)) }
674    }
675
676    // This function assumes that when it is called, array frq
677    // indicates the frequency of all possible symbols in the data
678    // that are to be Huffman-encoded.  (frq[i] contains the number
679    // of occurrences of symbol i in the data.)
680    //
681    // The loop below does three things:
682    //
683    // 1) Finds the minimum and maximum indices that point
684    //    to non-zero entries in frq:
685    //
686    //     frq[im] != 0, and frq[i] == 0 for all i < im
687    //     frq[iM] != 0, and frq[i] == 0 for all i > iM
688    //
689    // 2) Fills array fHeap with pointers to all non-zero
690    //    entries in frq.
691    //
692    // 3) Initializes array hlink such that hlink[i] == i
693    //    for all array entries.
694
695    // We need to use vec here or we overflow the stack.
696    let mut links = vec![0_usize; ENCODING_TABLE_SIZE];
697    let mut frequency_heap = vec![0_usize; ENCODING_TABLE_SIZE];
698
699    // This is a good solution since we don't have usize::MAX items (no panics or UB),
700    // and since this is short-circuit, it stops at the first in order non zero element.
701    let min_frequency_index = frequencies.iter().position(|f| *f != 0).unwrap_or(0);
702
703    let mut max_frequency_index = 0;
704    let mut frequency_count = 0;
705
706    // assert bounds check to optimize away bounds check in loops
707    assert!(links.len() >= ENCODING_TABLE_SIZE);
708    assert!(frequencies.len() >= ENCODING_TABLE_SIZE);
709
710    for index in min_frequency_index..ENCODING_TABLE_SIZE {
711        links[index] = index; // TODO for x in links.iter().enumerate()
712
713        if frequencies[index] != 0 {
714            frequency_heap[frequency_count] = index;
715            max_frequency_index = index;
716            frequency_count += 1;
717        }
718    }
719
720
721    // Add a pseudo-symbol, with a frequency count of 1, to frq;
722    // adjust the fHeap and hlink array accordingly.  Function
723    // hufEncode() uses the pseudo-symbol for run-length encoding.
724
725    max_frequency_index += 1;
726    frequencies[max_frequency_index] = 1;
727    frequency_heap[frequency_count] = max_frequency_index;
728    frequency_count += 1;
729
730    // Build an array, scode, such that scode[i] contains the number
731    // of bits assigned to symbol i.  Conceptually this is done by
732    // constructing a tree whose leaves are the symbols with non-zero
733    // frequency:
734    //
735    //     Make a heap that contains all symbols with a non-zero frequency,
736    //     with the least frequent symbol on top.
737    //
738    //     Repeat until only one symbol is left on the heap:
739    //
740    //         Take the two least frequent symbols off the top of the heap.
741    //         Create a new node that has first two nodes as children, and
742    //         whose frequency is the sum of the frequencies of the first
743    //         two nodes.  Put the new node back into the heap.
744    //
745    // The last node left on the heap is the root of the tree.  For each
746    // leaf node, the distance between the root and the leaf is the length
747    // of the code for the corresponding symbol.
748    //
749    // The loop below doesn't actually build the tree; instead we compute
750    // the distances of the leaves from the root on the fly.  When a new
751    // node is added to the heap, then that node's descendants are linked
752    // into a single linear list that starts at the new node, and the code
753    // lengths of the descendants (that is, their distance from the root
754    // of the tree) are incremented by one.
755    let mut heap = BinaryHeap::with_capacity(frequency_count);
756    for index in frequency_heap.drain(..frequency_count) {
757        heap.push(HeapFrequency { position: index, frequency: frequencies[index] });
758    }
759
760    let mut s_code = vec![0_u64; ENCODING_TABLE_SIZE];
761
762    while frequency_count > 1 {
763        // Find the indices, mm and m, of the two smallest non-zero frq
764        // values in fHeap, add the smallest frq to the second-smallest
765        // frq, and remove the smallest frq value from fHeap.
766        let (high_position, low_position) = {
767            let smallest_frequency = heap.pop().expect("heap empty bug");
768            frequency_count -= 1;
769
770            let mut second_smallest_frequency = heap.peek_mut().expect("heap empty bug");
771            second_smallest_frequency.frequency += smallest_frequency.frequency;
772
773            (second_smallest_frequency.position, smallest_frequency.position)
774        };
775
776        // The entries in scode are linked into lists with the
777        // entries in hlink serving as "next" pointers and with
778        // the end of a list marked by hlink[j] == j.
779        //
780        // Traverse the lists that start at scode[m] and scode[mm].
781        // For each element visited, increment the length of the
782        // corresponding code by one bit. (If we visit scode[j]
783        // during the traversal, then the code for symbol j becomes
784        // one bit longer.)
785        //
786        // Merge the lists that start at scode[m] and scode[mm]
787        // into a single list that starts at scode[m].
788
789        // Add a bit to all codes in the first list.
790        let mut index = high_position; // TODO fold()
791        loop {
792            s_code[index] += 1;
793            debug_assert!(s_code[index] <= 58);
794
795            // merge the two lists
796            if links[index] == index {
797                links[index] = low_position;
798                break;
799            }
800
801            index = links[index];
802        }
803
804        // Add a bit to all codes in the second list
805        let mut index = low_position; // TODO fold()
806        loop {
807            s_code[index] += 1;
808            debug_assert!(s_code[index] <= 58);
809
810            if links[index] == index {
811                break;
812            }
813
814            index = links[index];
815        }
816    }
817
818    // Build a canonical Huffman code table, replacing the code
819    // lengths in scode with (code, code length) pairs.  Copy the
820    // code table from scode into frq.
821    build_canonical_table(&mut s_code);
822    frequencies.copy_from_slice(&s_code);
823
824    (min_frequency_index, max_frequency_index)
825}
826
827
828#[inline] fn length(code: u64) -> u64 { code & 63 }
829#[inline] fn code(code: u64) -> u64 { code >> 6 }
830
831const INVALID_BIT_COUNT: &'static str = "invalid number of bits";
832const INVALID_TABLE_ENTRY: &'static str = "invalid code table entry";
833const NOT_ENOUGH_DATA: &'static str = "decoded data are shorter than expected";
834const INVALID_TABLE_SIZE: &'static str = "unexpected end of code table data";
835const TABLE_TOO_LONG: &'static str = "code table is longer than expected";
836const INVALID_CODE: &'static str = "invalid code";
837const TOO_MUCH_DATA: &'static str = "decoded data are longer than expected";
838
839
840#[cfg(test)]
841mod test {
842    use super::*;
843    use rand::{Rng, SeedableRng};
844
845    const UNCOMPRESSED_ARRAY: [u16; 100] = [
846        3852, 2432, 33635, 49381, 10100, 15095, 62693, 63738, 62359, 5013, 7715, 59875, 28182,
847        34449, 19983, 20399, 63407, 29486, 4877, 26738, 44815, 14042, 46091, 48228, 25682, 35412,
848        7582, 65069, 6632, 54124, 13798, 27503, 52154, 61961, 30474, 46880, 39097, 15754, 52897,
849        42371, 54053, 14178, 48276, 34591, 42602, 32126, 42062, 31474, 16274, 55991, 2882, 17039,
850        56389, 20835, 57057, 54081, 3414, 33957, 52584, 10222, 25139, 40002, 44980, 1602, 48021,
851        19703, 6562, 61777, 41582, 201, 31253, 51790, 15888, 40921, 3627, 12184, 16036, 26349,
852        3159, 29002, 14535, 50632, 18118, 33583, 18878, 59470, 32835, 9347, 16991, 21303, 26263,
853        8312, 14017, 41777, 43240, 3500, 60250, 52437, 45715, 61520,
854    ];
855
856    const UNCOMPRESSED_ARRAY_SPECIAL: [u16; 100] = [
857        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28182,
858        0, 65534, 0, 65534, 0, 65534, 0, 65534, 0, 0, 0, 0, 0,
859        0, 0, 0, 54124, 13798, 27503, 52154, 61961, 30474, 46880, 39097, 15754, 52897,
860        42371, 54053, 14178, 48276, 34591, 42602, 32126, 42062, 31474, 16274, 55991, 2882, 17039,
861        56389, 20835, 57057, 54081, 3414, 33957, 52584, 10222, 25139, 40002, 44980, 1602, 48021,
862        19703, 6562, 61777, 41582, 201, 31253, 51790, 15888, 40921, 3627, 12184, 16036, 26349,
863        3159, 29002, 14535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
864        65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534,
865    ];
866
867    const COMPRESSED_ARRAY: [u8; 703] = [
868        0xc9, 0x0, 0x0, 0x0, 0x2e, 0xfe, 0x0, 0x0, 0x56, 0x2, 0x0, 0x0, 0xa2, 0x2, 0x0, 0x0, 0x0,
869        0x0, 0x0, 0x0, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd6, 0x47,
870        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x28, 0x1f, 0xff, 0xff, 0xed, 0x87, 0xff, 0xff, 0xf0,
871        0x91, 0xff, 0xf8, 0x1f, 0xf4, 0xf1, 0xff, 0x78, 0x1f, 0xfd, 0xa1, 0xff, 0xff, 0xff, 0xff,
872        0xff, 0xff, 0xfa, 0xc7, 0xfe, 0x4, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
873        0xff, 0xed, 0x1f, 0xf3, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe8, 0x7, 0xfd, 0xf8,
874        0x7f, 0xff, 0xff, 0xff, 0xfd, 0x10, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x51, 0xff,
875        0xff, 0xff, 0xff, 0xfe, 0x1, 0xff, 0x73, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
876        0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0x0, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
877        0xff, 0xff, 0xff, 0xfc, 0xa4, 0x7f, 0xf5, 0x7, 0xfc, 0x48, 0x7f, 0xe0, 0x47, 0xff, 0xff,
878        0xf5, 0x91, 0xff, 0xff, 0xff, 0xff, 0xf1, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x21, 0xff,
879        0x7f, 0x1f, 0xf8, 0xd1, 0xff, 0xe7, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xbc, 0x1f, 0xf2, 0x91,
880        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1c, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xe7,
881        0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x8c, 0x7f, 0xff, 0xff, 0xc, 0x1f, 0xff, 0xff,
882        0xe5, 0x7, 0xff, 0xff, 0xfa, 0x81, 0xff, 0xff, 0xff, 0x20, 0x7f, 0xff, 0xff, 0xff, 0xff,
883        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
884        0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xbc, 0x7f, 0xff, 0xff, 0xff, 0xfc, 0x38, 0x7f, 0xff,
885        0xff, 0xff, 0xfc, 0xd0, 0x7f, 0xd3, 0xc7, 0xff, 0xff, 0xf7, 0x91, 0xff, 0xff, 0xff, 0xff,
886        0xfe, 0xc1, 0xff, 0xff, 0xff, 0xff, 0xf9, 0x61, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7,
887        0x87, 0xff, 0xff, 0xfd, 0x81, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0x87, 0xff, 0xff,
888        0xff, 0xff, 0xfe, 0x87, 0xff, 0x58, 0x7f, 0xff, 0xff, 0xff, 0xfd, 0xec, 0x7f, 0xff, 0xff,
889        0xff, 0xfe, 0xd0, 0x7f, 0xff, 0xff, 0xff, 0xff, 0x6c, 0x7f, 0xcb, 0x47, 0xff, 0xff, 0xf3,
890        0x61, 0xff, 0xff, 0xff, 0x80, 0x7f, 0xe1, 0xc7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f,
891        0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
892        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x18, 0x1f, 0xff, 0xff,
893        0xff, 0xff, 0xff, 0xfd, 0xcc, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x11, 0xff, 0xff,
894        0xff, 0xff, 0xf8, 0x41, 0xff, 0xbc, 0x1f, 0xff, 0xff, 0xc4, 0x47, 0xff, 0xff, 0xf2, 0x91,
895        0xff, 0xe0, 0x1f, 0xff, 0xff, 0xff, 0xff, 0x6d, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
896        0xff, 0xff, 0xff, 0xff, 0xff, 0x2, 0x1f, 0xf9, 0xe1, 0xff, 0xff, 0xff, 0xff, 0xfc, 0xe1,
897        0xff, 0xff, 0xfd, 0xb0, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe1, 0xff, 0xff, 0xff, 0xff,
898        0xff, 0xff, 0xff, 0xff, 0x5a, 0x1f, 0xfc, 0x81, 0xbf, 0x29, 0x1b, 0xff, 0xff, 0xff, 0xff,
899        0xff, 0xff, 0xff, 0xf3, 0x61, 0xbf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc8, 0x1b,
900        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf6, 0xb1, 0xbf, 0xff, 0xfd, 0x80, 0x6f, 0xff,
901        0xff, 0xf, 0x1b, 0xf8, 0xc1, 0xbf, 0xff, 0xfc, 0xb4, 0x6f, 0xff, 0xff, 0xff, 0xff, 0xff,
902        0xff, 0xff, 0xda, 0x46, 0xfc, 0x54, 0x6f, 0xc9, 0x6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
903        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x21, 0x1b, 0xff, 0xff, 0xe0, 0x86, 0xff, 0xff,
904        0xff, 0xff, 0xe2, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
905        0xff, 0xff, 0xff, 0xff, 0xff, 0xf3, 0x91, 0xbf, 0xff, 0xfe, 0x24, 0x6f, 0xff, 0xff, 0x6b,
906        0x1b, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfd, 0xb1, 0xbf, 0xfa, 0x1b, 0xfb, 0x11,
907        0xbf, 0xff, 0xfe, 0x8, 0x6f, 0xff, 0xff, 0x42, 0x1b, 0xff, 0xff, 0xff, 0xff, 0xb9, 0x1b,
908        0xff, 0xff, 0xcf, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0x31,
909        0x86, 0x10, 0x9, 0xb4, 0xe4, 0x4c, 0xf7, 0xef, 0x42, 0x87, 0x6a, 0xb5, 0xc2, 0x34, 0x9e,
910        0x2f, 0x12, 0xae, 0x21, 0x68, 0xf2, 0xa8, 0x74, 0x37, 0xe1, 0x98, 0x14, 0x59, 0x57, 0x2c,
911        0x24, 0x3b, 0x35, 0x6c, 0x1b, 0x8b, 0xcc, 0xe6, 0x13, 0x38, 0xc, 0x8e, 0xe2, 0xc, 0xfe,
912        0x49, 0x73, 0xbc, 0x2b, 0x7b, 0x9, 0x27, 0x79, 0x14, 0xc, 0x94, 0x42, 0xf8, 0x7c, 0x1,
913        0x8d, 0x26, 0xde, 0x87, 0x26, 0x71, 0x50, 0x45, 0xc6, 0x28, 0x40, 0xd5, 0xe, 0x8d, 0x8,
914        0x1e, 0x4c, 0xa4, 0x79, 0x57, 0xf0, 0xc3, 0x6d, 0x5c, 0x6d, 0xc0,
915    ];
916
917    fn fill(rng: &mut impl Rng, size: usize) -> Vec<u16> {
918        if rng.gen_bool(0.2) {
919            let value = if rng.gen_bool(0.5) { 0 } else { u16::MAX };
920            return vec![ value; size ];
921        }
922
923        let mut data = vec![0_u16; size];
924
925        data.iter_mut().for_each(|v| {
926            *v = rng.gen_range(0_u16 .. u16::MAX);
927        });
928
929        data
930    }
931
932    /// Test using both input and output from a custom ILM OpenEXR test.
933    #[test]
934    fn compression_comparation() {
935        let raw = compress(&UNCOMPRESSED_ARRAY).unwrap();
936        assert_eq!(raw, COMPRESSED_ARRAY.to_vec());
937    }
938
939    #[test]
940    fn round_trip() {
941        let mut random = rand::rngs::StdRng::from_seed(SEED);
942        let raw = fill(&mut random, u16::MAX as usize);
943
944        let compressed = compress(&raw).unwrap();
945        let uncompressed = decompress(&compressed, raw.len()).unwrap();
946
947        assert_eq!(uncompressed, raw);
948    }
949
950    #[test]
951    fn repetitions_special() {
952        let raw = UNCOMPRESSED_ARRAY_SPECIAL;
953
954        let compressed = compress(&raw).unwrap();
955        let uncompressed = decompress(&compressed, raw.len()).unwrap();
956
957        assert_eq!(uncompressed, raw.to_vec());
958    }
959
960    #[test]
961    fn round_trip100() {
962        let mut random = rand::rngs::StdRng::from_seed(SEED);
963
964        for size_multiplier in 1..10 {
965            let raw = fill(&mut random, size_multiplier * 50_000);
966
967            let compressed = compress(&raw).unwrap();
968            let uncompressed = decompress(&compressed, raw.len()).unwrap();
969
970            assert_eq!(uncompressed, raw);
971        }
972    }
973
974    #[test]
975    fn test_zeroes(){
976        let uncompressed: &[u16] = &[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ];
977
978        let compressed = compress(uncompressed).unwrap();
979        let decompressed = decompress(&compressed, uncompressed.len()).unwrap();
980
981        assert_eq!(uncompressed, decompressed.as_slice());
982    }
983
984    const SEED: [u8; 32] = [
985        12,155,32,34,112,109,98,54,
986        12,255,32,34,112,109,98,55,
987        12,155,32,34,12,109,98,54,
988        12,35,32,34,112,109,48,54,
989    ];
990}
exr/compression/piz/huffman.rs

exr/compression/piz/
huffman.rs