Skip to main content

tiff_reader/
filters.rs

1//! Compression filter pipeline for TIFF strip/tile decompression.
2
3#[cfg(any(feature = "jpeg", feature = "zstd"))]
4use std::io::Cursor;
5use std::io::Read;
6#[cfg(feature = "jpeg")]
7use std::panic::{self, AssertUnwindSafe};
8
9use crate::error::{Error, Result};
10use crate::header::ByteOrder;
11use tiff_core::{Compression, Predictor};
12
13/// Decompress a strip or tile according to the TIFF compression scheme.
14pub fn decompress(
15    compression: u16,
16    data: &[u8],
17    index: usize,
18    _jpeg_tables: Option<&[u8]>,
19    _decoded_len_limit: usize,
20) -> Result<Vec<u8>> {
21    match Compression::from_code(compression) {
22        Some(Compression::None) => Ok(data.to_vec()),
23        Some(Compression::Deflate | Compression::DeflateOld) => decompress_deflate(data, index),
24        Some(Compression::Lzw) => decompress_lzw(data, index),
25        Some(Compression::PackBits) => decompress_packbits(data, index),
26        #[cfg(feature = "jpeg")]
27        Some(Compression::OldJpeg) => Err(Error::UnsupportedCompression(compression)),
28        #[cfg(feature = "jpeg")]
29        Some(Compression::Jpeg) => decompress_jpeg(data, index, _jpeg_tables, _decoded_len_limit),
30        #[cfg(not(feature = "jpeg"))]
31        Some(Compression::OldJpeg | Compression::Jpeg) => {
32            Err(Error::UnsupportedCompression(compression))
33        }
34        #[cfg(feature = "zstd")]
35        Some(Compression::Zstd) => decompress_zstd(data, index),
36        #[cfg(not(feature = "zstd"))]
37        Some(Compression::Zstd) => Err(Error::UnsupportedCompression(compression)),
38        None => Err(Error::UnsupportedCompression(compression)),
39    }
40}
41
42/// Normalize row bytes into native-endian decoded samples and reverse any TIFF predictor.
43pub fn fix_endianness_and_predict(
44    row: &mut [u8],
45    bit_depth: u16,
46    samples: u16,
47    byte_order: ByteOrder,
48    predictor: u16,
49) -> Result<()> {
50    match Predictor::from_code(predictor) {
51        Some(Predictor::None) => {
52            fix_endianness(row, byte_order, bit_depth);
53            Ok(())
54        }
55        Some(Predictor::Horizontal) => {
56            fix_endianness(row, byte_order, bit_depth);
57            reverse_horizontal_predictor(row, bit_depth, samples);
58            Ok(())
59        }
60        Some(Predictor::FloatingPoint) => match bit_depth {
61            16 => {
62                let mut encoded = row.to_vec();
63                predict_f16(&mut encoded, row, samples);
64                Ok(())
65            }
66            32 => {
67                let mut encoded = row.to_vec();
68                predict_f32(&mut encoded, row, samples);
69                Ok(())
70            }
71            64 => {
72                let mut encoded = row.to_vec();
73                predict_f64(&mut encoded, row, samples);
74                Ok(())
75            }
76            _ => Err(Error::UnsupportedPredictor(3)),
77        },
78        None => Err(Error::UnsupportedPredictor(predictor)),
79    }
80}
81
82fn decompress_deflate(data: &[u8], index: usize) -> Result<Vec<u8>> {
83    use flate2::read::ZlibDecoder;
84
85    let mut decoder = ZlibDecoder::new(data);
86    let mut out = Vec::new();
87    decoder
88        .read_to_end(&mut out)
89        .map_err(|e| Error::DecompressionFailed {
90            index,
91            reason: format!("deflate: {e}"),
92        })?;
93    Ok(out)
94}
95
96fn decompress_lzw(data: &[u8], index: usize) -> Result<Vec<u8>> {
97    use weezl::decode::Decoder;
98    use weezl::BitOrder;
99
100    let mut decoder = Decoder::with_tiff_size_switch(BitOrder::Msb, 8);
101    decoder
102        .decode(data)
103        .map_err(|e| Error::DecompressionFailed {
104            index,
105            reason: format!("LZW: {e}"),
106        })
107}
108
109fn decompress_packbits(data: &[u8], index: usize) -> Result<Vec<u8>> {
110    let mut out = Vec::new();
111    let mut cursor = 0usize;
112
113    while cursor < data.len() {
114        let header = data[cursor] as i8;
115        cursor += 1;
116
117        if header >= 0 {
118            let count = header as usize + 1;
119            let end = cursor + count;
120            if end > data.len() {
121                return Err(Error::DecompressionFailed {
122                    index,
123                    reason: "PackBits literal run is truncated".into(),
124                });
125            }
126            out.extend_from_slice(&data[cursor..end]);
127            cursor = end;
128        } else if header != -128 {
129            if cursor >= data.len() {
130                return Err(Error::DecompressionFailed {
131                    index,
132                    reason: "PackBits repeat run is truncated".into(),
133                });
134            }
135            let count = (1i16 - header as i16) as usize;
136            let byte = data[cursor];
137            cursor += 1;
138            out.resize(out.len() + count, byte);
139        }
140    }
141
142    Ok(out)
143}
144
145#[cfg(feature = "jpeg")]
146fn decompress_jpeg(
147    data: &[u8],
148    index: usize,
149    jpeg_tables: Option<&[u8]>,
150    decoded_len_limit: usize,
151) -> Result<Vec<u8>> {
152    let stream = merge_jpeg_stream(jpeg_tables, data);
153    panic::catch_unwind(AssertUnwindSafe(|| {
154        let mut decoder = jpeg_decoder::Decoder::new(Cursor::new(stream));
155        decoder.set_max_decoding_buffer_size(decoded_len_limit);
156        decoder.read_info()?;
157        validate_jpeg_metadata_budget(&decoder, decoded_len_limit)?;
158        decoder.decode()
159    }))
160    .map_err(|payload| Error::DecompressionFailed {
161        index,
162        reason: format!(
163            "JPEG decoder panicked: {}",
164            panic_payload_message(payload.as_ref())
165        ),
166    })?
167    .map_err(|e| Error::DecompressionFailed {
168        index,
169        reason: format!("JPEG: {e}"),
170    })
171}
172
173#[cfg(feature = "jpeg")]
174fn validate_jpeg_metadata_budget<R: std::io::Read>(
175    decoder: &jpeg_decoder::Decoder<R>,
176    decoded_len_limit: usize,
177) -> std::result::Result<(), jpeg_decoder::Error> {
178    let info = decoder.info().ok_or_else(|| {
179        jpeg_decoder::Error::Format("JPEG metadata missing after read_info".into())
180    })?;
181    let decoded_len = usize::from(info.width)
182        .checked_mul(usize::from(info.height))
183        .and_then(|pixels| pixels.checked_mul(info.pixel_format.pixel_bytes()))
184        .ok_or_else(|| jpeg_decoder::Error::Format("JPEG decoded size overflow".into()))?;
185    if decoded_len > decoded_len_limit {
186        return Err(jpeg_decoder::Error::Format(format!(
187            "JPEG decoded size {decoded_len} exceeds TIFF block budget {decoded_len_limit}"
188        )));
189    }
190    Ok(())
191}
192
193#[cfg(feature = "zstd")]
194fn decompress_zstd(data: &[u8], index: usize) -> Result<Vec<u8>> {
195    zstd::stream::decode_all(Cursor::new(data)).map_err(|e| Error::DecompressionFailed {
196        index,
197        reason: format!("ZSTD: {e}"),
198    })
199}
200
201#[cfg(feature = "jpeg")]
202fn merge_jpeg_stream(jpeg_tables: Option<&[u8]>, scan_data: &[u8]) -> Vec<u8> {
203    if jpeg_tables.is_none() {
204        return scan_data.to_vec();
205    }
206
207    let tables = jpeg_tables.unwrap_or_default();
208    let table_body = match tables.strip_suffix(&[0xff, 0xd9]) {
209        Some(without_eoi) => without_eoi,
210        None => tables,
211    };
212    let scan_body = match scan_data.strip_prefix(&[0xff, 0xd8]) {
213        Some(without_soi) => without_soi,
214        None => scan_data,
215    };
216
217    let mut merged = Vec::with_capacity(table_body.len() + scan_body.len() + 2);
218    if table_body.starts_with(&[0xff, 0xd8]) {
219        merged.extend_from_slice(table_body);
220    } else {
221        merged.extend_from_slice(&[0xff, 0xd8]);
222        merged.extend_from_slice(table_body);
223    }
224    merged.extend_from_slice(scan_body);
225    if !merged.ends_with(&[0xff, 0xd9]) {
226        merged.extend_from_slice(&[0xff, 0xd9]);
227    }
228    merged
229}
230
231#[cfg(feature = "jpeg")]
232fn panic_payload_message(payload: &(dyn std::any::Any + Send)) -> String {
233    if let Some(message) = payload.downcast_ref::<&'static str>() {
234        (*message).to_string()
235    } else if let Some(message) = payload.downcast_ref::<String>() {
236        message.clone()
237    } else {
238        "unknown panic payload".into()
239    }
240}
241
242fn fix_endianness(buf: &mut [u8], byte_order: ByteOrder, bit_depth: u16) {
243    let host_is_little_endian = cfg!(target_endian = "little");
244    let data_is_little_endian = matches!(byte_order, ByteOrder::LittleEndian);
245    if host_is_little_endian == data_is_little_endian {
246        return;
247    }
248
249    let chunk = match bit_depth {
250        0..=8 => 1,
251        9..=16 => 2,
252        17..=32 => 4,
253        _ => 8,
254    };
255    if chunk == 1 {
256        return;
257    }
258
259    for value in buf.chunks_exact_mut(chunk) {
260        value.reverse();
261    }
262}
263
264fn reverse_horizontal_predictor(buf: &mut [u8], bit_depth: u16, samples: u16) {
265    let bytes_per_value = match bit_depth {
266        0..=8 => 1,
267        9..=16 => 2,
268        17..=32 => 4,
269        _ => 8,
270    };
271    let lookback = usize::from(samples) * bytes_per_value;
272
273    match bytes_per_value {
274        1 => {
275            for index in lookback..buf.len() {
276                buf[index] = buf[index].wrapping_add(buf[index - lookback]);
277            }
278        }
279        2 => {
280            for index in (lookback..buf.len()).step_by(2) {
281                let current = u16::from_ne_bytes(buf[index..index + 2].try_into().unwrap());
282                let previous = u16::from_ne_bytes(
283                    buf[index - lookback..index - lookback + 2]
284                        .try_into()
285                        .unwrap(),
286                );
287                buf[index..index + 2]
288                    .copy_from_slice(&current.wrapping_add(previous).to_ne_bytes());
289            }
290        }
291        4 => {
292            for index in (lookback..buf.len()).step_by(4) {
293                let current = u32::from_ne_bytes(buf[index..index + 4].try_into().unwrap());
294                let previous = u32::from_ne_bytes(
295                    buf[index - lookback..index - lookback + 4]
296                        .try_into()
297                        .unwrap(),
298                );
299                buf[index..index + 4]
300                    .copy_from_slice(&current.wrapping_add(previous).to_ne_bytes());
301            }
302        }
303        _ => {
304            for index in (lookback..buf.len()).step_by(8) {
305                let current = u64::from_ne_bytes(buf[index..index + 8].try_into().unwrap());
306                let previous = u64::from_ne_bytes(
307                    buf[index - lookback..index - lookback + 8]
308                        .try_into()
309                        .unwrap(),
310                );
311                buf[index..index + 8]
312                    .copy_from_slice(&current.wrapping_add(previous).to_ne_bytes());
313            }
314        }
315    }
316}
317
318fn predict_f16(input: &mut [u8], output: &mut [u8], samples: u16) {
319    let samples = usize::from(samples);
320    for i in samples..input.len() {
321        input[i] = input[i].wrapping_add(input[i - samples]);
322    }
323    for (i, chunk) in output.chunks_mut(2).enumerate() {
324        chunk.copy_from_slice(&u16::to_ne_bytes(u16::from_be_bytes([
325            input[i],
326            input[input.len() / 2 + i],
327        ])));
328    }
329}
330
331fn predict_f32(input: &mut [u8], output: &mut [u8], samples: u16) {
332    let samples = usize::from(samples);
333    for i in samples..input.len() {
334        input[i] = input[i].wrapping_add(input[i - samples]);
335    }
336    for (i, chunk) in output.chunks_mut(4).enumerate() {
337        chunk.copy_from_slice(&u32::to_ne_bytes(u32::from_be_bytes([
338            input[i],
339            input[input.len() / 4 + i],
340            input[input.len() / 2 + i],
341            input[input.len() / 4 * 3 + i],
342        ])));
343    }
344}
345
346fn predict_f64(input: &mut [u8], output: &mut [u8], samples: u16) {
347    let samples = usize::from(samples);
348    for i in samples..input.len() {
349        input[i] = input[i].wrapping_add(input[i - samples]);
350    }
351    for (i, chunk) in output.chunks_mut(8).enumerate() {
352        chunk.copy_from_slice(&u64::to_ne_bytes(u64::from_be_bytes([
353            input[i],
354            input[input.len() / 8 + i],
355            input[input.len() / 8 * 2 + i],
356            input[input.len() / 8 * 3 + i],
357            input[input.len() / 8 * 4 + i],
358            input[input.len() / 8 * 5 + i],
359            input[input.len() / 8 * 6 + i],
360            input[input.len() / 8 * 7 + i],
361        ])));
362    }
363}
364
365#[cfg(test)]
366mod tests {
367    use std::path::Path;
368
369    #[cfg(feature = "jpeg")]
370    use super::{decompress, merge_jpeg_stream};
371    use super::{decompress_lzw, decompress_packbits, fix_endianness_and_predict};
372    use crate::header::ByteOrder;
373    #[cfg(feature = "jpeg")]
374    use tiff_core::Compression;
375
376    #[test]
377    fn horizontal_predictor_restores_u16_rows() {
378        let mut row = vec![1, 0, 1, 0, 2, 0];
379        fix_endianness_and_predict(&mut row, 16, 1, ByteOrder::LittleEndian, 2).unwrap();
380        assert_eq!(row, vec![1, 0, 2, 0, 4, 0]);
381    }
382
383    #[test]
384    fn packbits_decoder_rejects_truncated_repeat_run() {
385        let err = decompress_packbits(&[0xff], 0).unwrap_err();
386        assert!(err.to_string().contains("PackBits"));
387    }
388
389    #[test]
390    fn lzw_real_cog_tile_requires_repeated_trailer_bytes() {
391        let fixture = Path::new(env!("CARGO_MANIFEST_DIR"))
392            .join("../testdata/interoperability/gdal/gcore/data/cog/byte_little_endian_golden.tif");
393        let bytes = std::fs::read(fixture).unwrap();
394
395        let without_trailer = &bytes[570..570 + 1223];
396        let with_trailer = &bytes[570..570 + 1227];
397
398        assert!(decompress_lzw(without_trailer, 0).is_ok());
399        assert!(decompress_lzw(with_trailer, 0).is_ok());
400    }
401
402    #[cfg(feature = "jpeg")]
403    #[test]
404    fn merges_jpeg_tables_with_abbreviated_scan() {
405        let merged = merge_jpeg_stream(
406            Some(&[0xff, 0xd8, 0xff, 0xdb, 0x00, 0x43, 0xff, 0xd9]),
407            &[0xff, 0xda, 0x00, 0x08, 0x00],
408        );
409        assert_eq!(&merged[..6], &[0xff, 0xd8, 0xff, 0xdb, 0x00, 0x43]);
410        assert!(merged.ends_with(&[0xff, 0xd9]));
411    }
412
413    #[cfg(feature = "jpeg")]
414    #[test]
415    fn jpeg_decoder_rejects_frame_sizes_that_exceed_tiff_budget() {
416        let mut jpeg = vec![
417            0xff, 0xd8, 0xff, 0xc0, 0x00, 0x0b, 0x08, 0x00, 0x14, 0x00, 0x14, 0x01, 0x01, 0x11,
418            0x00, 0xff, 0xc4, 0x00, 0x17, 0x00, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
419            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x03, 0x04, 0x06, 0xff, 0xc4,
420            0x00, 0x2a, 0x10, 0x00, 0x02, 0x01, 0x02, 0x04, 0x04, 0x05, 0x05, 0x00, 0x00, 0x00,
421            0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x03, 0x04, 0x00, 0x18, 0x31, 0x41,
422            0x13, 0x21, 0x51, 0x71, 0x05, 0x22, 0x61, 0x91, 0xb1, 0x14, 0x42, 0x62, 0xc1, 0xf0,
423            0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00, 0x75, 0xc5, 0xb7, 0xd2,
424            0x31, 0x4a, 0x75, 0x51, 0xe0, 0x65, 0xf2, 0x19, 0xd8, 0x8d, 0x7d, 0xfe, 0x71, 0x19,
425            0x2b, 0x94, 0x54, 0x2c, 0x33, 0x38, 0x20, 0x2f, 0x7d, 0xf5, 0xd2, 0x40, 0x18, 0x6b,
426            0xdc, 0x3d, 0xa0, 0x44, 0x15, 0xc9, 0x2c, 0xa1, 0xc8, 0x5c, 0xa4, 0x2c, 0xed, 0xcc,
427            0x74, 0x83, 0xcb, 0xaf, 0x59, 0xc2, 0xaf, 0x0f, 0x02, 0xb3, 0x2e, 0x57, 0xfc, 0x79,
428            0x15, 0x9f, 0x58, 0xee, 0x3f, 0x7b, 0xe0, 0x59, 0x95, 0x84, 0x26, 0x56, 0xac, 0xc2,
429            0x62, 0xa0, 0x8c, 0xa4, 0x91, 0xc9, 0x44, 0xed, 0xa4, 0x9e, 0x9a, 0x08, 0xc1, 0x8a,
430            0x54, 0x9d, 0x41, 0xe3, 0xa4, 0xe8, 0x65, 0x01, 0xe7, 0xdc, 0xff, 0x00, 0x6d, 0x8d,
431            0x2f, 0x89, 0x5b, 0x50, 0xbe, 0xb9, 0x4a, 0x0d, 0x4c, 0x53, 0x51, 0x01, 0x8a, 0x31,
432            0x9a, 0x92, 0x22, 0x5a, 0x49, 0xe7, 0xda, 0x37, 0xeb, 0x8c, 0xc5, 0xc7, 0x0a, 0xd5,
433            0x87, 0x0a, 0x85, 0x30, 0xc7, 0xee, 0x69, 0x27, 0x40, 0x77, 0x3e, 0xbf, 0x18, 0x99,
434            0xae, 0x1c, 0xb6, 0xc0, 0x0d, 0x02, 0xf9, 0x47, 0xb0, 0x81, 0x8f, 0xff, 0xd9,
435        ];
436        jpeg[7] = 0x9b;
437        jpeg[8] = 0x43;
438        jpeg[9] = 0xee;
439        jpeg[10] = 0x23;
440
441        let error = decompress(Compression::Jpeg.to_code(), &jpeg, 0, None, 20 * 20).unwrap_err();
442        assert!(error.to_string().contains("block budget"));
443    }
444}