Skip to main content

gamut_ifd/
reader.rs

1//! Parsing of the TIFF byte-order header and the IFD chain.
2//!
3//! The structure is offset-driven — a classic parser-exploit surface — so every access is
4//! bounds-checked, the IFD chain is guarded against loops and runaway length, and unknown field
5//! types are skipped rather than trusted.
6
7use gamut_core::{Error, Result};
8
9use crate::{ByteOrder, FieldType, Ifd, Value, Variant};
10
11/// A parsed TIFF/IFD stream: its byte order, container variant, and the chain of Image File
12/// Directories.
13#[derive(Debug, Clone, PartialEq)]
14pub struct TiffFile {
15    /// The byte order the stream was written in.
16    pub order: ByteOrder,
17    /// Whether the stream is classic TIFF or BigTIFF (which sizes its offsets/counts).
18    pub variant: Variant,
19    /// The Image File Directories, in stream order (one per subfile/page).
20    pub ifds: Vec<Ifd>,
21}
22
23/// An upper bound on the number of IFDs followed, to bound malformed/looping chains.
24const MAX_IFDS: usize = 1 << 16;
25
26/// Reads a 16-bit value at `pos` in `order`, bounds-checked.
27fn u16_at(data: &[u8], pos: usize, order: ByteOrder) -> Result<u16> {
28    let b = data
29        .get(pos..pos + 2)
30        .ok_or(Error::InvalidInput("TIFF: truncated 16-bit field"))?;
31    Ok(order.u16([b[0], b[1]]))
32}
33
34/// Reads a 32-bit value at `pos` in `order`, bounds-checked.
35fn u32_at(data: &[u8], pos: usize, order: ByteOrder) -> Result<u32> {
36    let b = data
37        .get(pos..pos + 4)
38        .ok_or(Error::InvalidInput("TIFF: truncated 32-bit field"))?;
39    Ok(order.u32([b[0], b[1], b[2], b[3]]))
40}
41
42/// Reads a 64-bit value at `pos` in `order`, bounds-checked (BigTIFF offsets/counts).
43#[cfg(feature = "bigtiff")]
44fn u64_at(data: &[u8], pos: usize, order: ByteOrder) -> Result<u64> {
45    let b = data
46        .get(pos..pos + 8)
47        .ok_or(Error::InvalidInput("TIFF: truncated 64-bit field"))?;
48    Ok(order.u64([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]))
49}
50
51/// Reads an offset-sized field at `pos` (a `u32` in classic TIFF, a `u64` in BigTIFF) as `u64`.
52///
53/// Used for every file offset and for the per-field value count, which share the offset width.
54fn offset_at(data: &[u8], pos: usize, order: ByteOrder, variant: Variant) -> Result<u64> {
55    match variant {
56        Variant::Classic => Ok(u64::from(u32_at(data, pos, order)?)),
57        #[cfg(feature = "bigtiff")]
58        Variant::Big => u64_at(data, pos, order),
59    }
60}
61
62/// Parses the image file header, returning the byte order, the container variant, and the offset
63/// of the first IFD. The header is 8 bytes for classic TIFF and 16 bytes for BigTIFF.
64///
65/// Without the `bigtiff` feature a BigTIFF magic number (`43`) is rejected as an unknown magic.
66///
67/// # Errors
68///
69/// Returns [`Error::InvalidInput`] if the byte-order mark, magic number, or (for BigTIFF) the
70/// fixed offset-size / reserved fields are not valid.
71pub fn read_header(data: &[u8]) -> Result<(ByteOrder, Variant, u64)> {
72    let head = data
73        .get(..8)
74        .ok_or(Error::InvalidInput("TIFF: header too short"))?;
75    let order = match [head[0], head[1]] {
76        [0x49, 0x49] => ByteOrder::LittleEndian,
77        [0x4D, 0x4D] => ByteOrder::BigEndian,
78        _ => return Err(Error::InvalidInput("TIFF: bad byte-order mark")),
79    };
80    match order.u16([head[2], head[3]]) {
81        42 => Ok((order, Variant::Classic, u64::from(u32_at(data, 4, order)?))),
82        #[cfg(feature = "bigtiff")]
83        43 => {
84            // BigTIFF: bytes 4-5 are the offset bytesize (always 8), bytes 6-7 are reserved (0),
85            // and the first-IFD offset is the 8-byte value at bytes 8-15.
86            if order.u16([head[4], head[5]]) != 8 {
87                return Err(Error::InvalidInput("TIFF: BigTIFF offset size must be 8"));
88            }
89            if order.u16([head[6], head[7]]) != 0 {
90                return Err(Error::InvalidInput(
91                    "TIFF: BigTIFF reserved field must be 0",
92                ));
93            }
94            Ok((order, Variant::Big, u64_at(data, 8, order)?))
95        }
96        _ => Err(Error::InvalidInput("TIFF: bad magic number")),
97    }
98}
99
100/// Reads the single IFD at `offset`, returning it and the offset of the next IFD (`0` if last).
101///
102/// The field widths follow `variant`: the entry count is 2 bytes (classic) or 8 (BigTIFF), each
103/// entry is 12 or 20 bytes, a value packs inline when it fits in the offset width (4 or 8), and
104/// the next-IFD pointer is 4 or 8 bytes.
105fn read_ifd(data: &[u8], offset: usize, order: ByteOrder, variant: Variant) -> Result<(Ifd, u64)> {
106    let entry_size = variant.entry_size();
107    let inline = variant.inline_threshold();
108    // The entry count is the only field whose width differs from the offset width (2 vs 8).
109    let count = match variant {
110        Variant::Classic => u64::from(u16_at(data, offset, order)?),
111        #[cfg(feature = "bigtiff")]
112        Variant::Big => u64_at(data, offset, order)?,
113    } as usize;
114    let entries_start = offset + variant.count_size();
115    let next_pos = entries_start + count * entry_size;
116    // Bound the directory to the file so a corrupt count fails fast rather than allocating.
117    if next_pos + variant.offset_size() > data.len() {
118        return Err(Error::InvalidInput("TIFF: IFD extends past end of file"));
119    }
120    let mut ifd = Ifd::new();
121    for i in 0..count {
122        let pos = entries_start + i * entry_size;
123        let tag = u16_at(data, pos, order)?;
124        let type_code = u16_at(data, pos + 2, order)?;
125        // The value count and the value/offset field both follow the offset width.
126        let value_count = offset_at(data, pos + 4, order, variant)? as usize;
127        let value_pos = pos + 4 + variant.offset_size();
128        // Per spec, readers skip fields with an unexpected (unknown) field type.
129        let Some(ty) = FieldType::from_code(type_code) else {
130            continue;
131        };
132        let byte_len = value_count
133            .checked_mul(ty.size())
134            .ok_or(Error::InvalidInput("TIFF: field length overflow"))?;
135        let value = if byte_len <= inline {
136            Value::decode(ty, value_count, &data[value_pos..value_pos + inline], order)?
137        } else {
138            let voff = offset_at(data, value_pos, order, variant)? as usize;
139            let bytes = data
140                .get(voff..)
141                .ok_or(Error::InvalidInput("TIFF: value offset out of bounds"))?;
142            Value::decode(ty, value_count, bytes, order)?
143        };
144        // A duplicate tag keeps the last occurrence; `set` maintains sort order.
145        ifd.set(tag, value);
146    }
147    let next = offset_at(data, next_pos, order, variant)?;
148    Ok((ifd, next))
149}
150
151/// Parses a TIFF/IFD stream: the header followed by the whole IFD chain.
152///
153/// # Errors
154///
155/// Returns [`Error::InvalidInput`] if the header is invalid, an offset is out of bounds, the IFD
156/// chain loops, or a field value is truncated.
157pub fn read(data: &[u8]) -> Result<TiffFile> {
158    let (order, variant, first) = read_header(data)?;
159    let mut ifds = Vec::new();
160    let mut offset = first as usize;
161    let mut seen = Vec::new();
162    while offset != 0 {
163        if seen.contains(&offset) {
164            return Err(Error::InvalidInput("TIFF: IFD chain loops"));
165        }
166        if ifds.len() >= MAX_IFDS {
167            return Err(Error::InvalidInput("TIFF: too many IFDs"));
168        }
169        seen.push(offset);
170        let (ifd, next) = read_ifd(data, offset, order, variant)?;
171        ifds.push(ifd);
172        offset = next as usize;
173    }
174    if ifds.is_empty() {
175        return Err(Error::InvalidInput("TIFF: file has no IFD"));
176    }
177    Ok(TiffFile {
178        order,
179        variant,
180        ifds,
181    })
182}
183
184#[cfg(test)]
185mod tests {
186    use super::*;
187
188    #[test]
189    fn rejects_bad_header() {
190        assert!(read_header(b"\x49\x49").is_err()); // too short
191        assert!(read_header(b"XX\x2a\x00\x08\x00\x00\x00").is_err()); // bad BOM
192        assert!(read_header(b"II\x00\x00\x08\x00\x00\x00").is_err()); // bad magic
193        let (order, variant, first) = read_header(b"II\x2a\x00\x08\x00\x00\x00").expect("ok");
194        assert_eq!(order, ByteOrder::LittleEndian);
195        assert_eq!(variant, Variant::Classic);
196        assert_eq!(first, 8);
197    }
198
199    #[cfg(feature = "bigtiff")]
200    #[test]
201    fn parses_bigtiff_header() {
202        // II, magic 43, offset-size 8, reserved 0, then an 8-byte first-IFD offset of 16.
203        let head = b"II\x2b\x00\x08\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00";
204        let (order, variant, first) = read_header(head).expect("ok");
205        assert_eq!(order, ByteOrder::LittleEndian);
206        assert_eq!(variant, Variant::Big);
207        assert_eq!(first, 16);
208        // The fixed BigTIFF offset-size (8) and reserved (0) fields are validated.
209        assert!(
210            read_header(b"II\x2b\x00\x04\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00").is_err()
211        );
212        assert!(
213            read_header(b"II\x2b\x00\x08\x00\x01\x00\x10\x00\x00\x00\x00\x00\x00\x00").is_err()
214        );
215        // A BigTIFF magic with a truncated (classic-length) header is rejected, not read OOB.
216        assert!(read_header(b"II\x2b\x00\x08\x00\x00\x00").is_err());
217    }
218
219    /// Without the feature, a BigTIFF magic is an unknown magic, not a mis-parse.
220    #[cfg(not(feature = "bigtiff"))]
221    #[test]
222    fn rejects_bigtiff_without_feature() {
223        let head = b"II\x2b\x00\x08\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00";
224        assert!(read_header(head).is_err());
225    }
226
227    #[test]
228    fn empty_input_errors() {
229        assert!(read(&[]).is_err());
230    }
231}