tiff/encoder/compression/
packbits.rs

1use crate::{encoder::compression::*, tags::CompressionMethod};
2use std::io::{BufWriter, Error, ErrorKind, Write};
3
4/// Compressor that uses the Packbits[^note] algorithm to compress bytes.
5///
6/// [^note]: PackBits is often ineffective on continuous tone images,
7///          including many grayscale images. In such cases, it is better
8///          to leave the image uncompressed.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
10pub struct Packbits;
11
12impl Compression for Packbits {
13    const COMPRESSION_METHOD: CompressionMethod = CompressionMethod::PackBits;
14
15    fn get_algorithm(&self) -> Compressor {
16        Compressor::Packbits(*self)
17    }
18}
19
20impl CompressionAlgorithm for Packbits {
21    fn write_to<W: Write>(&mut self, writer: &mut W, bytes: &[u8]) -> Result<u64, io::Error> {
22        // Inspired by https://github.com/skirridsystems/packbits
23
24        const MIN_REPT: u8 = 3; // Minimum run to compress between differ blocks
25        const MAX_BYTES: u8 = 128; // Maximum number of bytes that can be encoded in a header byte
26
27        // Encoding for header byte based on number of bytes represented.
28        fn encode_diff(n: u8) -> u8 {
29            n - 1
30        }
31        fn encode_rept(n: u8) -> u8 {
32            let var = 256 - (n - 1) as u16;
33            var as u8
34        }
35
36        fn write_u8<W: Write>(writer: &mut W, byte: u8) -> Result<u64, Error> {
37            writer.write(&[byte]).map(|byte_count| byte_count as u64)
38        }
39
40        let mut bufwriter = BufWriter::new(writer);
41        let mut bytes_written = 0u64; // The number of bytes written into the writer
42        let mut offset: Option<u64> = None; // The index of the first byte written into the writer
43
44        let mut src_index: usize = 0; // Index of the current byte
45        let mut src_count = bytes.len(); //The number of bytes remaining to be compressed
46
47        let mut in_run = false; // Indication whether counting of similar bytes is performed
48        let mut run_index = 0u8; // Distance into pending bytes that a run starts
49
50        let mut bytes_pending = 0u8; // Bytes looked at but not yet output
51        let mut pending_index = 0usize; // Index of the first pending byte
52
53        let mut curr_byte: u8; // Byte currently being considered
54        let mut last_byte: u8; // Previous byte
55
56        // Need at least one byte to compress
57        if src_count == 0 {
58            return Err(Error::new(ErrorKind::WriteZero, "write zero"));
59        }
60
61        // Prime compressor with first character.
62        last_byte = bytes[src_index];
63        src_index += 1;
64        bytes_pending += 1;
65
66        while src_count - 1 != 0 {
67            src_count -= 1;
68            curr_byte = bytes[src_index];
69            src_index += 1;
70            bytes_pending += 1;
71
72            if in_run {
73                if (curr_byte != last_byte) || (bytes_pending > MAX_BYTES) {
74                    offset.get_or_insert(write_u8(&mut bufwriter, encode_rept(bytes_pending - 1))?);
75                    write_u8(&mut bufwriter, last_byte)?;
76                    bytes_written += 2;
77
78                    bytes_pending = 1;
79                    pending_index = src_index - 1;
80                    run_index = 0;
81                    in_run = false;
82                }
83            } else if bytes_pending > MAX_BYTES {
84                // We have as much differing data as we can output in one chunk.
85                // Output MAX_BYTES leaving one byte.
86                offset.get_or_insert(write_u8(&mut bufwriter, encode_diff(MAX_BYTES))?);
87                bufwriter.write_all(&bytes[pending_index..pending_index + MAX_BYTES as usize])?;
88                bytes_written += 1 + MAX_BYTES as u64;
89
90                pending_index += MAX_BYTES as usize;
91                bytes_pending -= MAX_BYTES;
92                run_index = bytes_pending - 1; // A run could start here
93            } else if curr_byte == last_byte {
94                if (bytes_pending - run_index >= MIN_REPT) || (run_index == 0) {
95                    // This is a worthwhile run
96                    if run_index != 0 {
97                        // Flush differing data out of input buffer
98                        offset.get_or_insert(write_u8(&mut bufwriter, encode_diff(run_index))?);
99                        bufwriter
100                            .write_all(&bytes[pending_index..pending_index + run_index as usize])?;
101                        bytes_written += 1 + run_index as u64;
102                    }
103                    bytes_pending -= run_index; // Length of run
104                    in_run = true;
105                }
106            } else {
107                run_index = bytes_pending - 1; // A run could start here
108            }
109            last_byte = curr_byte;
110        }
111
112        // Output the remainder
113        if in_run {
114            bytes_written += 2;
115            offset.get_or_insert(write_u8(&mut bufwriter, encode_rept(bytes_pending))?);
116            write_u8(&mut bufwriter, last_byte)?;
117        } else {
118            bytes_written += 1 + bytes_pending as u64;
119            offset.get_or_insert(write_u8(&mut bufwriter, encode_diff(bytes_pending))?);
120            bufwriter.write_all(&bytes[pending_index..pending_index + bytes_pending as usize])?;
121        }
122
123        bufwriter.flush()?;
124        Ok(bytes_written)
125    }
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131    use crate::encoder::compression::tests::TEST_DATA;
132    use std::io::Cursor;
133
134    #[test]
135    fn test_packbits_single_byte() {
136        // compress single byte
137        const UNCOMPRESSED_DATA: [u8; 1] = [0x3F];
138        const EXPECTED_COMPRESSED_DATA: [u8; 2] = [0x00, 0x3F];
139
140        let mut compressed_data = Vec::<u8>::new();
141        let mut writer = Cursor::new(&mut compressed_data);
142        Packbits.write_to(&mut writer, &UNCOMPRESSED_DATA).unwrap();
143        assert_eq!(compressed_data, EXPECTED_COMPRESSED_DATA);
144    }
145
146    #[test]
147    fn test_packbits_rept() {
148        // compress buffer with repetitive sequence
149        const UNCOMPRESSED_DATA: &[u8] =
150            b"This strrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrring hangs.";
151        const EXPECTED_COMPRESSED_DATA: &[u8] = b"\x06This st\xD1r\x09ing hangs.";
152
153        let mut compressed_data = Vec::<u8>::new();
154        let mut writer = Cursor::new(&mut compressed_data);
155        Packbits.write_to(&mut writer, UNCOMPRESSED_DATA).unwrap();
156        assert_eq!(compressed_data, EXPECTED_COMPRESSED_DATA);
157    }
158
159    #[test]
160    fn test_packbits_large_rept_nonrept() {
161        // compress buffer with large repetitive and non-repetitive sequence
162        let mut data = b"This st".to_vec();
163        for _i in 0..158 {
164            data.push(b'r');
165        }
166        data.extend_from_slice(b"ing hangs.");
167        for i in 0..158 {
168            data.push(i);
169        }
170
171        const EXPECTED_COMPRESSED_DATA: [u8; 182] = [
172            0x06, 0x54, 0x68, 0x69, 0x73, 0x20, 0x73, 0x74, 0x81, 0x72, 0xE3, 0x72, 0x7F, 0x69,
173            0x6E, 0x67, 0x20, 0x68, 0x61, 0x6E, 0x67, 0x73, 0x2E, 0x00, 0x01, 0x02, 0x03, 0x04,
174            0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12,
175            0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20,
176            0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E,
177            0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C,
178            0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A,
179            0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
180            0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,
181            0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74,
182            0x75, 0x27, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81,
183            0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
184            0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D,
185        ];
186
187        let mut compressed_data = Vec::<u8>::new();
188        let mut writer = Cursor::new(&mut compressed_data);
189        Packbits.write_to(&mut writer, data.as_slice()).unwrap();
190        assert_eq!(compressed_data, EXPECTED_COMPRESSED_DATA);
191    }
192
193    #[test]
194    fn test_packbits() {
195        // compress teststring
196        const EXPECTED_COMPRESSED_DATA: &[u8] =
197            b"\x3CThis is a string for checking various compression algorithms.";
198
199        let mut compressed_data = Vec::<u8>::new();
200        let mut writer = Cursor::new(&mut compressed_data);
201        Packbits.write_to(&mut writer, TEST_DATA).unwrap();
202        assert_eq!(compressed_data, EXPECTED_COMPRESSED_DATA);
203    }
204}