Skip to main content

oxideav_webp/
container.rs

1//! RIFF/WEBP container walker per RFC 9649 (WebP Image Format).
2//!
3//! This module covers only the **structural** layer of WebP:
4//!
5//! * §2.3 — Generic RIFF chunk: 4-byte FourCC, 4-byte little-endian
6//!   uint32 size, payload, and (if size is odd) a single 0 padding
7//!   byte that is not counted in the size field.
8//! * §2.4 — WebP file header: 12 bytes total — the ASCII tag `RIFF`,
9//!   a 32-bit little-endian *File Size* counting everything after
10//!   offset 8, then the ASCII tag `WEBP`.
11//! * §2.5 / §2.6 / §2.7 — the layouts that follow: simple lossy
12//!   (`VP8 `), simple lossless (`VP8L`), and extended (`VP8X` plus
13//!   any of `ICCP`, `ANIM`, `ANMF`, `ALPH`, `VP8 `/`VP8L`, `EXIF`,
14//!   `XMP `, plus unknown chunks per §2.7.1.6).
15//!
16//! Decoding of `VP8 ` / `VP8L` / `ALPH` payloads is **out of scope**
17//! for this layer; the walker only records FourCC + payload range.
18
19use core::fmt;
20
21/// Fixed 4-byte FourCC tag as carried on disk (preserving the
22/// trailing space in `"VP8 "` and `"XMP "`).
23pub type FourCc = [u8; 4];
24
25/// FourCC tags called out by name in RFC 9649 §2.4–§2.7.
26pub mod fourcc {
27    use super::FourCc;
28
29    /// `"RIFF"` — opening tag of the §2.4 WebP file header.
30    pub const RIFF: FourCc = *b"RIFF";
31    /// `"WEBP"` — form-type tag of the §2.4 WebP file header.
32    pub const WEBP: FourCc = *b"WEBP";
33
34    /// `"VP8 "` (with trailing 0x20) — §2.5 / §2.7.1.3 lossy bitstream.
35    pub const VP8: FourCc = *b"VP8 ";
36    /// `"VP8L"` — §2.6 / §2.7.1.3 lossless bitstream.
37    pub const VP8L: FourCc = *b"VP8L";
38    /// `"VP8X"` — §2.7 extended-format flags + canvas dimensions.
39    pub const VP8X: FourCc = *b"VP8X";
40    /// `"ALPH"` — §2.7.1.2 alpha plane (used with `VP8 `).
41    pub const ALPH: FourCc = *b"ALPH";
42    /// `"ANIM"` — §2.7.1.1 animation control.
43    pub const ANIM: FourCc = *b"ANIM";
44    /// `"ANMF"` — §2.7.1.1 per-frame chunk.
45    pub const ANMF: FourCc = *b"ANMF";
46    /// `"ICCP"` — §2.7.1.4 ICC color profile.
47    pub const ICCP: FourCc = *b"ICCP";
48    /// `"EXIF"` — §2.7.1.5 Exif metadata.
49    pub const EXIF: FourCc = *b"EXIF";
50    /// `"XMP "` (with trailing 0x20) — §2.7.1.5 XMP metadata.
51    pub const XMP: FourCc = *b"XMP ";
52}
53
54/// Errors raised by the RIFF/WEBP walker. The walker reports the
55/// *first* structural problem it sees and stops — it is not a
56/// recovery layer.
57#[derive(Debug, Clone, PartialEq, Eq)]
58pub enum ContainerError {
59    /// The buffer is shorter than the 12-byte §2.4 file header.
60    TooShortForHeader { got: usize },
61    /// Bytes 0..4 are not the ASCII tag `RIFF`.
62    NotRiff { got: FourCc },
63    /// Bytes 8..12 are not the ASCII tag `WEBP`.
64    NotWebp { got: FourCc },
65    /// The §2.4 `File Size` field says the payload extends past the
66    /// end of the buffer. The header `File Size` counts the
67    /// `WEBP` FourCC plus everything after it.
68    RiffSizeOverflowsBuffer {
69        /// `File Size` value as parsed from bytes 4..8.
70        declared: u32,
71        /// Total buffer length the walker was given.
72        buffer_len: usize,
73    },
74    /// A chunk header was truncated — the 8 bytes required to read
75    /// `FourCC + Size` do not fit in what remains of the RIFF
76    /// payload at `offset`.
77    TruncatedChunkHeader {
78        /// Absolute offset (from the start of the buffer) where the
79        /// truncated header begins.
80        offset: usize,
81    },
82    /// A chunk's declared `Size` value runs past the end of the RIFF
83    /// payload — i.e. payload `Size` bytes would extend beyond the
84    /// region delimited by the outer §2.4 file header.
85    ChunkPayloadOverflowsRiff {
86        /// Absolute offset where the offending chunk's header starts.
87        offset: usize,
88        /// `Size` value as parsed from the chunk header.
89        declared: u32,
90        /// Number of payload bytes the walker actually had room for.
91        available: usize,
92    },
93    /// A chunk's declared `Size` value is odd, and the §2.3 padding
94    /// byte that would follow it is missing because there are no
95    /// further bytes in the RIFF payload.
96    MissingPadByte {
97        /// Absolute offset of the chunk header whose payload ends
98        /// without its required pad byte.
99        offset: usize,
100    },
101}
102
103impl fmt::Display for ContainerError {
104    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
105        match self {
106            Self::TooShortForHeader { got } => write!(
107                f,
108                "WebP buffer too short for §2.4 file header (12 bytes), got {got}"
109            ),
110            Self::NotRiff { got } => write!(
111                f,
112                "WebP buffer does not start with §2.4 'RIFF' tag (got {:02x?})",
113                got
114            ),
115            Self::NotWebp { got } => write!(
116                f,
117                "WebP buffer is RIFF but not 'WEBP' form (got {:02x?})",
118                got
119            ),
120            Self::RiffSizeOverflowsBuffer {
121                declared,
122                buffer_len,
123            } => write!(
124                f,
125                "§2.4 RIFF File Size {declared} overflows buffer length {buffer_len}"
126            ),
127            Self::TruncatedChunkHeader { offset } => write!(
128                f,
129                "§2.3 chunk header at offset {offset} is truncated (need 8 bytes)"
130            ),
131            Self::ChunkPayloadOverflowsRiff {
132                offset,
133                declared,
134                available,
135            } => write!(
136                f,
137                "§2.3 chunk at offset {offset} declares Size {declared} \
138                 but only {available} bytes remain in the RIFF payload"
139            ),
140            Self::MissingPadByte { offset } => write!(
141                f,
142                "§2.3 chunk at offset {offset} has odd Size but no trailing pad byte"
143            ),
144        }
145    }
146}
147
148impl std::error::Error for ContainerError {}
149
150/// One §2.3 RIFF chunk inside a WebP file.
151///
152/// The walker records the FourCC, the declared payload size, and the
153/// absolute `(start, end)` byte range of the payload inside the input
154/// buffer. Borrowing the payload bytes is left to the caller to keep
155/// this struct cheap to move and copy.
156#[derive(Debug, Clone, Copy, PartialEq, Eq)]
157pub struct WebpChunk {
158    /// Four-byte FourCC tag.
159    pub fourcc: FourCc,
160    /// `Size` field as declared in the §2.3 chunk header.
161    pub size: u32,
162    /// Absolute offset (from buffer start) of the first payload byte.
163    pub payload_start: usize,
164    /// Absolute offset (from buffer start) of one past the last
165    /// payload byte. `payload_end - payload_start == size as usize`.
166    pub payload_end: usize,
167}
168
169impl WebpChunk {
170    /// Borrow the payload bytes out of the original input slice.
171    pub fn payload<'a>(&self, buf: &'a [u8]) -> &'a [u8] {
172        &buf[self.payload_start..self.payload_end]
173    }
174
175    /// True if the FourCC is `"VP8 "` (note the trailing space).
176    pub fn is_vp8_lossy(&self) -> bool {
177        self.fourcc == fourcc::VP8
178    }
179
180    /// True if the FourCC is `"VP8L"`.
181    pub fn is_vp8_lossless(&self) -> bool {
182        self.fourcc == fourcc::VP8L
183    }
184
185    /// True if the FourCC is `"VP8X"`.
186    pub fn is_extended(&self) -> bool {
187        self.fourcc == fourcc::VP8X
188    }
189}
190
191/// Output of the §2.3–§2.7 RIFF walk.
192///
193/// Holds the §2.4 declared `File Size` plus the ordered list of
194/// chunks discovered inside the RIFF payload. The walker does not
195/// re-order chunks — they appear in the order on disk so that
196/// downstream code can apply §2.7's ordering rules.
197#[derive(Debug, Clone, PartialEq, Eq)]
198pub struct WebpContainer {
199    /// `File Size` field from bytes 4..8 of the §2.4 file header.
200    pub riff_file_size: u32,
201    /// Chunks parsed from the §2.4 file header's payload, in the
202    /// order they appear on disk.
203    pub chunks: Vec<WebpChunk>,
204}
205
206impl WebpContainer {
207    /// Iterate over chunks matching a given FourCC.
208    pub fn chunks_with_fourcc(&self, fourcc: FourCc) -> impl Iterator<Item = &WebpChunk> + '_ {
209        self.chunks.iter().filter(move |c| c.fourcc == fourcc)
210    }
211
212    /// Find the first chunk matching a given FourCC, if any.
213    pub fn first_chunk_with_fourcc(&self, fourcc: FourCc) -> Option<&WebpChunk> {
214        self.chunks.iter().find(|c| c.fourcc == fourcc)
215    }
216
217    /// True if the container starts with `VP8X`, indicating §2.7
218    /// extended layout. (The §2.7 ordering rule places `VP8X` first
219    /// among the structural chunks when present.)
220    pub fn is_extended(&self) -> bool {
221        self.chunks
222            .first()
223            .map(|c| c.is_extended())
224            .unwrap_or(false)
225    }
226}
227
228/// Walk a `RIFF/WEBP` container per RFC 9649 §2.3–§2.7 and return
229/// the list of chunks discovered.
230///
231/// The walker enforces structural invariants only:
232///
233/// * `buf` is at least 12 bytes.
234/// * Bytes `0..4` are `"RIFF"` and bytes `8..12` are `"WEBP"`.
235/// * The §2.4 `File Size` field does not overflow `buf`.
236/// * Each subsequent chunk header is 8 bytes; the declared `Size`
237///   fits inside the remaining RIFF payload; and if `Size` is odd
238///   the required §2.3 pad byte is present.
239///
240/// Per-chunk *content* validation (e.g. the VP8X reserved bits,
241/// VP8 frame width/height, animation counts) is the responsibility
242/// of layers above this walker.
243pub fn parse(buf: &[u8]) -> Result<WebpContainer, ContainerError> {
244    // §2.4 file header — 12 bytes.
245    if buf.len() < 12 {
246        return Err(ContainerError::TooShortForHeader { got: buf.len() });
247    }
248    let riff_tag: FourCc = buf[0..4]
249        .try_into()
250        .expect("12-byte slice always has 4 bytes at offset 0");
251    if riff_tag != fourcc::RIFF {
252        return Err(ContainerError::NotRiff { got: riff_tag });
253    }
254    let riff_file_size = u32::from_le_bytes(
255        buf[4..8]
256            .try_into()
257            .expect("12-byte slice always has 4 bytes at offset 4"),
258    );
259    let webp_tag: FourCc = buf[8..12]
260        .try_into()
261        .expect("12-byte slice always has 4 bytes at offset 8");
262    if webp_tag != fourcc::WEBP {
263        return Err(ContainerError::NotWebp { got: webp_tag });
264    }
265
266    // §2.4: "The file size in the header is the total size of the
267    // chunks that follow plus 4 bytes for the 'WEBP' FourCC."
268    //
269    // So the RIFF payload (the bytes after the 8-byte 'RIFF' +
270    // File Size header) is exactly `riff_file_size` bytes long, of
271    // which the first 4 are the 'WEBP' FourCC and the remainder are
272    // the chunk stream. The walker tolerates trailing data beyond
273    // `riff_file_size` per §2.4 ("Readers MAY parse such files,
274    // ignoring the trailing data") but it never *reads* past that
275    // declared limit when walking chunks.
276    let declared_payload_end = 8usize.saturating_add(riff_file_size as usize);
277    if declared_payload_end > buf.len() {
278        return Err(ContainerError::RiffSizeOverflowsBuffer {
279            declared: riff_file_size,
280            buffer_len: buf.len(),
281        });
282    }
283    let chunk_stream_end = declared_payload_end;
284
285    let mut chunks: Vec<WebpChunk> = Vec::new();
286    let mut cursor: usize = 12; // first byte after the §2.4 header
287    while cursor < chunk_stream_end {
288        // Need 8 bytes for FourCC + Size.
289        if chunk_stream_end - cursor < 8 {
290            return Err(ContainerError::TruncatedChunkHeader { offset: cursor });
291        }
292        let fourcc: FourCc = buf[cursor..cursor + 4]
293            .try_into()
294            .expect("bounds checked above");
295        let size = u32::from_le_bytes(
296            buf[cursor + 4..cursor + 8]
297                .try_into()
298                .expect("bounds checked above"),
299        );
300
301        let payload_start = cursor + 8;
302        let payload_avail = chunk_stream_end - payload_start;
303        if (size as usize) > payload_avail {
304            return Err(ContainerError::ChunkPayloadOverflowsRiff {
305                offset: cursor,
306                declared: size,
307                available: payload_avail,
308            });
309        }
310        let payload_end = payload_start + size as usize;
311
312        chunks.push(WebpChunk {
313            fourcc,
314            size,
315            payload_start,
316            payload_end,
317        });
318
319        // §2.3 padding: if Size is odd, a single 0 byte follows that
320        // is *not* counted in Size. The walker requires that byte to
321        // be present (but does not check its value — §2.3 says it
322        // MUST be 0; that's a writer constraint, not a reader
323        // refusal mode).
324        let needs_pad = (size & 1) == 1;
325        let total = if needs_pad {
326            (size as usize).checked_add(1)
327        } else {
328            Some(size as usize)
329        }
330        .expect("size+1 cannot overflow because size <= payload_avail < usize::MAX");
331        let after_chunk =
332            payload_start
333                .checked_add(total)
334                .ok_or(ContainerError::ChunkPayloadOverflowsRiff {
335                    offset: cursor,
336                    declared: size,
337                    available: payload_avail,
338                })?;
339        if after_chunk > chunk_stream_end {
340            return Err(ContainerError::MissingPadByte { offset: cursor });
341        }
342        cursor = after_chunk;
343    }
344
345    Ok(WebpContainer {
346        riff_file_size,
347        chunks,
348    })
349}
350
351#[cfg(test)]
352mod tests {
353    use super::*;
354
355    /// Build a §2.3 chunk header + payload + (if odd) one pad byte.
356    fn chunk(fourcc: &FourCc, payload: &[u8]) -> Vec<u8> {
357        let mut v = Vec::with_capacity(8 + payload.len() + 1);
358        v.extend_from_slice(fourcc);
359        v.extend_from_slice(&(payload.len() as u32).to_le_bytes());
360        v.extend_from_slice(payload);
361        if payload.len() % 2 == 1 {
362            v.push(0);
363        }
364        v
365    }
366
367    /// Wrap a sequence of already-formed chunks in a §2.4 WebP file
368    /// header, setting `File Size` to `4 + sum_of_chunk_bytes`.
369    fn webp(chunks: &[u8]) -> Vec<u8> {
370        let file_size = 4u32 + chunks.len() as u32;
371        let mut v = Vec::with_capacity(12 + chunks.len());
372        v.extend_from_slice(b"RIFF");
373        v.extend_from_slice(&file_size.to_le_bytes());
374        v.extend_from_slice(b"WEBP");
375        v.extend_from_slice(chunks);
376        v
377    }
378
379    #[test]
380    fn simple_lossy_walks_to_one_vp8_chunk() {
381        // §2.5: WebP file header + a single 'VP8 ' chunk with a
382        // 7-byte payload (odd, exercises the §2.3 pad byte).
383        let body = chunk(&fourcc::VP8, &[0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02, 0x03]);
384        let buf = webp(&body);
385        let c = parse(&buf).expect("simple lossy parses");
386        assert_eq!(c.riff_file_size, 4 + body.len() as u32);
387        assert_eq!(c.chunks.len(), 1);
388        let only = &c.chunks[0];
389        assert!(only.is_vp8_lossy());
390        assert_eq!(only.size, 7);
391        assert_eq!(
392            only.payload(&buf),
393            &[0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02, 0x03]
394        );
395        assert!(!c.is_extended());
396    }
397
398    #[test]
399    fn simple_lossless_walks_to_one_vp8l_chunk() {
400        // §2.6: WebP file header + a single 'VP8L' chunk with a
401        // 4-byte payload (even, no §2.3 pad byte).
402        let body = chunk(&fourcc::VP8L, &[0x2F, 0x00, 0x00, 0x00]);
403        let buf = webp(&body);
404        let c = parse(&buf).expect("simple lossless parses");
405        assert_eq!(c.chunks.len(), 1);
406        let only = &c.chunks[0];
407        assert!(only.is_vp8_lossless());
408        assert_eq!(only.size, 4);
409        assert_eq!(only.payload(&buf), &[0x2F, 0x00, 0x00, 0x00]);
410    }
411
412    #[test]
413    fn extended_layout_walks_all_chunks_in_order() {
414        // §2.7 example: VP8X + ICCP + ANIM + ANMF (+ inner VP8 ) +
415        // EXIF + XMP . The walker should record them in the order
416        // they appear on disk and surface every FourCC.
417        let vp8x_payload = vec![
418            0x10, 0x00, 0x00, 0x00, // Rsv|I|L|E|X|A|R + 24 bits reserved
419            0x07, 0x00, 0x00, // Canvas Width Minus One = 7  (width 8)
420            0x07, 0x00, 0x00, // Canvas Height Minus One = 7 (height 8)
421        ];
422        let mut body = Vec::new();
423        body.extend(chunk(&fourcc::VP8X, &vp8x_payload));
424        body.extend(chunk(&fourcc::ICCP, &[0xAA; 5])); // odd payload exercises pad
425        body.extend(chunk(&fourcc::ANIM, &[0; 6]));
426        body.extend(chunk(&fourcc::ANMF, &[0; 9])); // odd payload
427        body.extend(chunk(&fourcc::VP8, &[0; 8]));
428        body.extend(chunk(&fourcc::EXIF, b"Exif\x00\x00MM*\x00"));
429        body.extend(chunk(&fourcc::XMP, b"<?xpacket?>"));
430        let buf = webp(&body);
431
432        let c = parse(&buf).expect("extended layout parses");
433        let order: Vec<FourCc> = c.chunks.iter().map(|c| c.fourcc).collect();
434        assert_eq!(
435            order,
436            vec![
437                fourcc::VP8X,
438                fourcc::ICCP,
439                fourcc::ANIM,
440                fourcc::ANMF,
441                fourcc::VP8,
442                fourcc::EXIF,
443                fourcc::XMP,
444            ]
445        );
446        assert!(c.is_extended());
447        assert_eq!(c.first_chunk_with_fourcc(fourcc::ICCP).unwrap().size, 5);
448        assert_eq!(c.chunks_with_fourcc(fourcc::VP8).count(), 1);
449
450        // Spot-check the ICCP payload survived the §2.3 pad byte.
451        let iccp = c.first_chunk_with_fourcc(fourcc::ICCP).unwrap();
452        assert_eq!(iccp.payload(&buf), &[0xAA, 0xAA, 0xAA, 0xAA, 0xAA]);
453    }
454
455    #[test]
456    fn rejects_buffer_shorter_than_file_header() {
457        // §2.4 requires 12 bytes; supply only 11.
458        let buf = b"RIFF\x00\x00\x00\x00WEB";
459        assert_eq!(
460            parse(buf),
461            Err(ContainerError::TooShortForHeader { got: 11 })
462        );
463    }
464
465    #[test]
466    fn rejects_wrong_riff_or_form_tag() {
467        // First the 'RIFF' tag itself is wrong.
468        let mut buf = b"riff\x04\x00\x00\x00WEBP".to_vec();
469        match parse(&buf) {
470            Err(ContainerError::NotRiff { got }) => assert_eq!(&got, b"riff"),
471            other => panic!("expected NotRiff, got {other:?}"),
472        }
473
474        // Now 'RIFF' but a non-'WEBP' form type — §2.4 demands WEBP.
475        buf[0..4].copy_from_slice(b"RIFF");
476        buf[8..12].copy_from_slice(b"AVI ");
477        match parse(&buf) {
478            Err(ContainerError::NotWebp { got }) => assert_eq!(&got, b"AVI "),
479            other => panic!("expected NotWebp, got {other:?}"),
480        }
481    }
482
483    #[test]
484    fn rejects_chunk_whose_size_overflows_riff_payload() {
485        // A 'VP8 ' header that claims Size = 100 in a RIFF whose
486        // payload only has 8 + 0 bytes of room for the chunk.
487        let mut bad = Vec::new();
488        bad.extend_from_slice(b"VP8 ");
489        bad.extend_from_slice(&100u32.to_le_bytes()); // declared size 100
490                                                      // Wrap in a §2.4 header that says File Size = 4 (just WEBP)
491                                                      // + 8 (the bad chunk header). The chunk's declared 100-byte
492                                                      // payload doesn't fit in the 0 remaining bytes.
493        let buf = webp(&bad);
494        match parse(&buf) {
495            Err(ContainerError::ChunkPayloadOverflowsRiff {
496                offset,
497                declared,
498                available,
499            }) => {
500                assert_eq!(offset, 12);
501                assert_eq!(declared, 100);
502                assert_eq!(available, 0);
503            }
504            other => panic!("expected ChunkPayloadOverflowsRiff, got {other:?}"),
505        }
506    }
507
508    #[test]
509    fn rejects_odd_chunk_missing_pad_byte() {
510        // Hand-craft a RIFF whose declared File Size accounts for an
511        // odd-length chunk **without** including its §2.3 pad byte.
512        // The walker should refuse rather than read past the end of
513        // the declared payload.
514        let mut chunk_bytes = Vec::new();
515        chunk_bytes.extend_from_slice(b"ICCP");
516        chunk_bytes.extend_from_slice(&3u32.to_le_bytes()); // odd size
517        chunk_bytes.extend_from_slice(&[0xDE, 0xAD, 0xBE]); // 3 payload bytes, NO pad
518
519        // File Size = 4 ('WEBP') + len(chunk_bytes); deliberately
520        // no extra trailing pad byte beyond what we wrote.
521        let mut buf = Vec::new();
522        buf.extend_from_slice(b"RIFF");
523        buf.extend_from_slice(&(4u32 + chunk_bytes.len() as u32).to_le_bytes());
524        buf.extend_from_slice(b"WEBP");
525        buf.extend_from_slice(&chunk_bytes);
526
527        match parse(&buf) {
528            Err(ContainerError::MissingPadByte { offset }) => assert_eq!(offset, 12),
529            other => panic!("expected MissingPadByte, got {other:?}"),
530        }
531    }
532
533    #[test]
534    fn rejects_riff_size_that_runs_past_buffer() {
535        // Header says File Size = 1000 but we only supply the
536        // 12-byte header itself.
537        let mut buf = b"RIFF".to_vec();
538        buf.extend_from_slice(&1000u32.to_le_bytes());
539        buf.extend_from_slice(b"WEBP");
540        match parse(&buf) {
541            Err(ContainerError::RiffSizeOverflowsBuffer {
542                declared,
543                buffer_len,
544            }) => {
545                assert_eq!(declared, 1000);
546                assert_eq!(buffer_len, 12);
547            }
548            other => panic!("expected RiffSizeOverflowsBuffer, got {other:?}"),
549        }
550    }
551}