oxideav_webp/container.rs
1//! RIFF/WEBP container walker per RFC 9649 (WebP Image Format).
2//!
3//! This module covers only the **structural** layer of WebP:
4//!
5//! * §2.3 — Generic RIFF chunk: 4-byte FourCC, 4-byte little-endian
6//! uint32 size, payload, and (if size is odd) a single 0 padding
7//! byte that is not counted in the size field.
8//! * §2.4 — WebP file header: 12 bytes total — the ASCII tag `RIFF`,
9//! a 32-bit little-endian *File Size* counting everything after
10//! offset 8, then the ASCII tag `WEBP`.
11//! * §2.5 / §2.6 / §2.7 — the layouts that follow: simple lossy
12//! (`VP8 `), simple lossless (`VP8L`), and extended (`VP8X` plus
13//! any of `ICCP`, `ANIM`, `ANMF`, `ALPH`, `VP8 `/`VP8L`, `EXIF`,
14//! `XMP `, plus unknown chunks per §2.7.1.6).
15//!
16//! Decoding of `VP8 ` / `VP8L` / `ALPH` payloads is **out of scope**
17//! for this layer; the walker only records FourCC + payload range.
18
19use core::fmt;
20
21/// Fixed 4-byte FourCC tag as carried on disk (preserving the
22/// trailing space in `"VP8 "` and `"XMP "`).
23pub type FourCc = [u8; 4];
24
25/// FourCC tags called out by name in RFC 9649 §2.4–§2.7.
26pub mod fourcc {
27 use super::FourCc;
28
29 /// `"RIFF"` — opening tag of the §2.4 WebP file header.
30 pub const RIFF: FourCc = *b"RIFF";
31 /// `"WEBP"` — form-type tag of the §2.4 WebP file header.
32 pub const WEBP: FourCc = *b"WEBP";
33
34 /// `"VP8 "` (with trailing 0x20) — §2.5 / §2.7.1.3 lossy bitstream.
35 pub const VP8: FourCc = *b"VP8 ";
36 /// `"VP8L"` — §2.6 / §2.7.1.3 lossless bitstream.
37 pub const VP8L: FourCc = *b"VP8L";
38 /// `"VP8X"` — §2.7 extended-format flags + canvas dimensions.
39 pub const VP8X: FourCc = *b"VP8X";
40 /// `"ALPH"` — §2.7.1.2 alpha plane (used with `VP8 `).
41 pub const ALPH: FourCc = *b"ALPH";
42 /// `"ANIM"` — §2.7.1.1 animation control.
43 pub const ANIM: FourCc = *b"ANIM";
44 /// `"ANMF"` — §2.7.1.1 per-frame chunk.
45 pub const ANMF: FourCc = *b"ANMF";
46 /// `"ICCP"` — §2.7.1.4 ICC color profile.
47 pub const ICCP: FourCc = *b"ICCP";
48 /// `"EXIF"` — §2.7.1.5 Exif metadata.
49 pub const EXIF: FourCc = *b"EXIF";
50 /// `"XMP "` (with trailing 0x20) — §2.7.1.5 XMP metadata.
51 pub const XMP: FourCc = *b"XMP ";
52}
53
54/// Errors raised by the RIFF/WEBP walker. The walker reports the
55/// *first* structural problem it sees and stops — it is not a
56/// recovery layer.
57#[derive(Debug, Clone, PartialEq, Eq)]
58pub enum ContainerError {
59 /// The buffer is shorter than the 12-byte §2.4 file header.
60 TooShortForHeader { got: usize },
61 /// Bytes 0..4 are not the ASCII tag `RIFF`.
62 NotRiff { got: FourCc },
63 /// Bytes 8..12 are not the ASCII tag `WEBP`.
64 NotWebp { got: FourCc },
65 /// The §2.4 `File Size` field says the payload extends past the
66 /// end of the buffer. The header `File Size` counts the
67 /// `WEBP` FourCC plus everything after it.
68 RiffSizeOverflowsBuffer {
69 /// `File Size` value as parsed from bytes 4..8.
70 declared: u32,
71 /// Total buffer length the walker was given.
72 buffer_len: usize,
73 },
74 /// A chunk header was truncated — the 8 bytes required to read
75 /// `FourCC + Size` do not fit in what remains of the RIFF
76 /// payload at `offset`.
77 TruncatedChunkHeader {
78 /// Absolute offset (from the start of the buffer) where the
79 /// truncated header begins.
80 offset: usize,
81 },
82 /// A chunk's declared `Size` value runs past the end of the RIFF
83 /// payload — i.e. payload `Size` bytes would extend beyond the
84 /// region delimited by the outer §2.4 file header.
85 ChunkPayloadOverflowsRiff {
86 /// Absolute offset where the offending chunk's header starts.
87 offset: usize,
88 /// `Size` value as parsed from the chunk header.
89 declared: u32,
90 /// Number of payload bytes the walker actually had room for.
91 available: usize,
92 },
93 /// A chunk's declared `Size` value is odd, and the §2.3 padding
94 /// byte that would follow it is missing because there are no
95 /// further bytes in the RIFF payload.
96 MissingPadByte {
97 /// Absolute offset of the chunk header whose payload ends
98 /// without its required pad byte.
99 offset: usize,
100 },
101}
102
103impl fmt::Display for ContainerError {
104 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
105 match self {
106 Self::TooShortForHeader { got } => write!(
107 f,
108 "WebP buffer too short for §2.4 file header (12 bytes), got {got}"
109 ),
110 Self::NotRiff { got } => write!(
111 f,
112 "WebP buffer does not start with §2.4 'RIFF' tag (got {:02x?})",
113 got
114 ),
115 Self::NotWebp { got } => write!(
116 f,
117 "WebP buffer is RIFF but not 'WEBP' form (got {:02x?})",
118 got
119 ),
120 Self::RiffSizeOverflowsBuffer {
121 declared,
122 buffer_len,
123 } => write!(
124 f,
125 "§2.4 RIFF File Size {declared} overflows buffer length {buffer_len}"
126 ),
127 Self::TruncatedChunkHeader { offset } => write!(
128 f,
129 "§2.3 chunk header at offset {offset} is truncated (need 8 bytes)"
130 ),
131 Self::ChunkPayloadOverflowsRiff {
132 offset,
133 declared,
134 available,
135 } => write!(
136 f,
137 "§2.3 chunk at offset {offset} declares Size {declared} \
138 but only {available} bytes remain in the RIFF payload"
139 ),
140 Self::MissingPadByte { offset } => write!(
141 f,
142 "§2.3 chunk at offset {offset} has odd Size but no trailing pad byte"
143 ),
144 }
145 }
146}
147
148impl std::error::Error for ContainerError {}
149
150/// One §2.3 RIFF chunk inside a WebP file.
151///
152/// The walker records the FourCC, the declared payload size, and the
153/// absolute `(start, end)` byte range of the payload inside the input
154/// buffer. Borrowing the payload bytes is left to the caller to keep
155/// this struct cheap to move and copy.
156#[derive(Debug, Clone, Copy, PartialEq, Eq)]
157pub struct WebpChunk {
158 /// Four-byte FourCC tag.
159 pub fourcc: FourCc,
160 /// `Size` field as declared in the §2.3 chunk header.
161 pub size: u32,
162 /// Absolute offset (from buffer start) of the first payload byte.
163 pub payload_start: usize,
164 /// Absolute offset (from buffer start) of one past the last
165 /// payload byte. `payload_end - payload_start == size as usize`.
166 pub payload_end: usize,
167}
168
169impl WebpChunk {
170 /// Borrow the payload bytes out of the original input slice.
171 pub fn payload<'a>(&self, buf: &'a [u8]) -> &'a [u8] {
172 &buf[self.payload_start..self.payload_end]
173 }
174
175 /// True if the FourCC is `"VP8 "` (note the trailing space).
176 pub fn is_vp8_lossy(&self) -> bool {
177 self.fourcc == fourcc::VP8
178 }
179
180 /// True if the FourCC is `"VP8L"`.
181 pub fn is_vp8_lossless(&self) -> bool {
182 self.fourcc == fourcc::VP8L
183 }
184
185 /// True if the FourCC is `"VP8X"`.
186 pub fn is_extended(&self) -> bool {
187 self.fourcc == fourcc::VP8X
188 }
189}
190
191/// Output of the §2.3–§2.7 RIFF walk.
192///
193/// Holds the §2.4 declared `File Size` plus the ordered list of
194/// chunks discovered inside the RIFF payload. The walker does not
195/// re-order chunks — they appear in the order on disk so that
196/// downstream code can apply §2.7's ordering rules.
197#[derive(Debug, Clone, PartialEq, Eq)]
198pub struct WebpContainer {
199 /// `File Size` field from bytes 4..8 of the §2.4 file header.
200 pub riff_file_size: u32,
201 /// Chunks parsed from the §2.4 file header's payload, in the
202 /// order they appear on disk.
203 pub chunks: Vec<WebpChunk>,
204}
205
206impl WebpContainer {
207 /// Iterate over chunks matching a given FourCC.
208 pub fn chunks_with_fourcc(&self, fourcc: FourCc) -> impl Iterator<Item = &WebpChunk> + '_ {
209 self.chunks.iter().filter(move |c| c.fourcc == fourcc)
210 }
211
212 /// Find the first chunk matching a given FourCC, if any.
213 pub fn first_chunk_with_fourcc(&self, fourcc: FourCc) -> Option<&WebpChunk> {
214 self.chunks.iter().find(|c| c.fourcc == fourcc)
215 }
216
217 /// True if the container starts with `VP8X`, indicating §2.7
218 /// extended layout. (The §2.7 ordering rule places `VP8X` first
219 /// among the structural chunks when present.)
220 pub fn is_extended(&self) -> bool {
221 self.chunks
222 .first()
223 .map(|c| c.is_extended())
224 .unwrap_or(false)
225 }
226}
227
228/// Walk a `RIFF/WEBP` container per RFC 9649 §2.3–§2.7 and return
229/// the list of chunks discovered.
230///
231/// The walker enforces structural invariants only:
232///
233/// * `buf` is at least 12 bytes.
234/// * Bytes `0..4` are `"RIFF"` and bytes `8..12` are `"WEBP"`.
235/// * The §2.4 `File Size` field does not overflow `buf`.
236/// * Each subsequent chunk header is 8 bytes; the declared `Size`
237/// fits inside the remaining RIFF payload; and if `Size` is odd
238/// the required §2.3 pad byte is present.
239///
240/// Per-chunk *content* validation (e.g. the VP8X reserved bits,
241/// VP8 frame width/height, animation counts) is the responsibility
242/// of layers above this walker.
243pub fn parse(buf: &[u8]) -> Result<WebpContainer, ContainerError> {
244 // §2.4 file header — 12 bytes.
245 if buf.len() < 12 {
246 return Err(ContainerError::TooShortForHeader { got: buf.len() });
247 }
248 let riff_tag: FourCc = buf[0..4]
249 .try_into()
250 .expect("12-byte slice always has 4 bytes at offset 0");
251 if riff_tag != fourcc::RIFF {
252 return Err(ContainerError::NotRiff { got: riff_tag });
253 }
254 let riff_file_size = u32::from_le_bytes(
255 buf[4..8]
256 .try_into()
257 .expect("12-byte slice always has 4 bytes at offset 4"),
258 );
259 let webp_tag: FourCc = buf[8..12]
260 .try_into()
261 .expect("12-byte slice always has 4 bytes at offset 8");
262 if webp_tag != fourcc::WEBP {
263 return Err(ContainerError::NotWebp { got: webp_tag });
264 }
265
266 // §2.4: "The file size in the header is the total size of the
267 // chunks that follow plus 4 bytes for the 'WEBP' FourCC."
268 //
269 // So the RIFF payload (the bytes after the 8-byte 'RIFF' +
270 // File Size header) is exactly `riff_file_size` bytes long, of
271 // which the first 4 are the 'WEBP' FourCC and the remainder are
272 // the chunk stream. The walker tolerates trailing data beyond
273 // `riff_file_size` per §2.4 ("Readers MAY parse such files,
274 // ignoring the trailing data") but it never *reads* past that
275 // declared limit when walking chunks.
276 let declared_payload_end = 8usize.saturating_add(riff_file_size as usize);
277 if declared_payload_end > buf.len() {
278 return Err(ContainerError::RiffSizeOverflowsBuffer {
279 declared: riff_file_size,
280 buffer_len: buf.len(),
281 });
282 }
283 let chunk_stream_end = declared_payload_end;
284
285 let mut chunks: Vec<WebpChunk> = Vec::new();
286 let mut cursor: usize = 12; // first byte after the §2.4 header
287 while cursor < chunk_stream_end {
288 // Need 8 bytes for FourCC + Size.
289 if chunk_stream_end - cursor < 8 {
290 return Err(ContainerError::TruncatedChunkHeader { offset: cursor });
291 }
292 let fourcc: FourCc = buf[cursor..cursor + 4]
293 .try_into()
294 .expect("bounds checked above");
295 let size = u32::from_le_bytes(
296 buf[cursor + 4..cursor + 8]
297 .try_into()
298 .expect("bounds checked above"),
299 );
300
301 let payload_start = cursor + 8;
302 let payload_avail = chunk_stream_end - payload_start;
303 if (size as usize) > payload_avail {
304 return Err(ContainerError::ChunkPayloadOverflowsRiff {
305 offset: cursor,
306 declared: size,
307 available: payload_avail,
308 });
309 }
310 let payload_end = payload_start + size as usize;
311
312 chunks.push(WebpChunk {
313 fourcc,
314 size,
315 payload_start,
316 payload_end,
317 });
318
319 // §2.3 padding: if Size is odd, a single 0 byte follows that
320 // is *not* counted in Size. The walker requires that byte to
321 // be present (but does not check its value — §2.3 says it
322 // MUST be 0; that's a writer constraint, not a reader
323 // refusal mode).
324 let needs_pad = (size & 1) == 1;
325 let total = if needs_pad {
326 (size as usize).checked_add(1)
327 } else {
328 Some(size as usize)
329 }
330 .expect("size+1 cannot overflow because size <= payload_avail < usize::MAX");
331 let after_chunk =
332 payload_start
333 .checked_add(total)
334 .ok_or(ContainerError::ChunkPayloadOverflowsRiff {
335 offset: cursor,
336 declared: size,
337 available: payload_avail,
338 })?;
339 if after_chunk > chunk_stream_end {
340 return Err(ContainerError::MissingPadByte { offset: cursor });
341 }
342 cursor = after_chunk;
343 }
344
345 Ok(WebpContainer {
346 riff_file_size,
347 chunks,
348 })
349}
350
351#[cfg(test)]
352mod tests {
353 use super::*;
354
355 /// Build a §2.3 chunk header + payload + (if odd) one pad byte.
356 fn chunk(fourcc: &FourCc, payload: &[u8]) -> Vec<u8> {
357 let mut v = Vec::with_capacity(8 + payload.len() + 1);
358 v.extend_from_slice(fourcc);
359 v.extend_from_slice(&(payload.len() as u32).to_le_bytes());
360 v.extend_from_slice(payload);
361 if payload.len() % 2 == 1 {
362 v.push(0);
363 }
364 v
365 }
366
367 /// Wrap a sequence of already-formed chunks in a §2.4 WebP file
368 /// header, setting `File Size` to `4 + sum_of_chunk_bytes`.
369 fn webp(chunks: &[u8]) -> Vec<u8> {
370 let file_size = 4u32 + chunks.len() as u32;
371 let mut v = Vec::with_capacity(12 + chunks.len());
372 v.extend_from_slice(b"RIFF");
373 v.extend_from_slice(&file_size.to_le_bytes());
374 v.extend_from_slice(b"WEBP");
375 v.extend_from_slice(chunks);
376 v
377 }
378
379 #[test]
380 fn simple_lossy_walks_to_one_vp8_chunk() {
381 // §2.5: WebP file header + a single 'VP8 ' chunk with a
382 // 7-byte payload (odd, exercises the §2.3 pad byte).
383 let body = chunk(&fourcc::VP8, &[0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02, 0x03]);
384 let buf = webp(&body);
385 let c = parse(&buf).expect("simple lossy parses");
386 assert_eq!(c.riff_file_size, 4 + body.len() as u32);
387 assert_eq!(c.chunks.len(), 1);
388 let only = &c.chunks[0];
389 assert!(only.is_vp8_lossy());
390 assert_eq!(only.size, 7);
391 assert_eq!(
392 only.payload(&buf),
393 &[0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02, 0x03]
394 );
395 assert!(!c.is_extended());
396 }
397
398 #[test]
399 fn simple_lossless_walks_to_one_vp8l_chunk() {
400 // §2.6: WebP file header + a single 'VP8L' chunk with a
401 // 4-byte payload (even, no §2.3 pad byte).
402 let body = chunk(&fourcc::VP8L, &[0x2F, 0x00, 0x00, 0x00]);
403 let buf = webp(&body);
404 let c = parse(&buf).expect("simple lossless parses");
405 assert_eq!(c.chunks.len(), 1);
406 let only = &c.chunks[0];
407 assert!(only.is_vp8_lossless());
408 assert_eq!(only.size, 4);
409 assert_eq!(only.payload(&buf), &[0x2F, 0x00, 0x00, 0x00]);
410 }
411
412 #[test]
413 fn extended_layout_walks_all_chunks_in_order() {
414 // §2.7 example: VP8X + ICCP + ANIM + ANMF (+ inner VP8 ) +
415 // EXIF + XMP . The walker should record them in the order
416 // they appear on disk and surface every FourCC.
417 let vp8x_payload = vec![
418 0x10, 0x00, 0x00, 0x00, // Rsv|I|L|E|X|A|R + 24 bits reserved
419 0x07, 0x00, 0x00, // Canvas Width Minus One = 7 (width 8)
420 0x07, 0x00, 0x00, // Canvas Height Minus One = 7 (height 8)
421 ];
422 let mut body = Vec::new();
423 body.extend(chunk(&fourcc::VP8X, &vp8x_payload));
424 body.extend(chunk(&fourcc::ICCP, &[0xAA; 5])); // odd payload exercises pad
425 body.extend(chunk(&fourcc::ANIM, &[0; 6]));
426 body.extend(chunk(&fourcc::ANMF, &[0; 9])); // odd payload
427 body.extend(chunk(&fourcc::VP8, &[0; 8]));
428 body.extend(chunk(&fourcc::EXIF, b"Exif\x00\x00MM*\x00"));
429 body.extend(chunk(&fourcc::XMP, b"<?xpacket?>"));
430 let buf = webp(&body);
431
432 let c = parse(&buf).expect("extended layout parses");
433 let order: Vec<FourCc> = c.chunks.iter().map(|c| c.fourcc).collect();
434 assert_eq!(
435 order,
436 vec![
437 fourcc::VP8X,
438 fourcc::ICCP,
439 fourcc::ANIM,
440 fourcc::ANMF,
441 fourcc::VP8,
442 fourcc::EXIF,
443 fourcc::XMP,
444 ]
445 );
446 assert!(c.is_extended());
447 assert_eq!(c.first_chunk_with_fourcc(fourcc::ICCP).unwrap().size, 5);
448 assert_eq!(c.chunks_with_fourcc(fourcc::VP8).count(), 1);
449
450 // Spot-check the ICCP payload survived the §2.3 pad byte.
451 let iccp = c.first_chunk_with_fourcc(fourcc::ICCP).unwrap();
452 assert_eq!(iccp.payload(&buf), &[0xAA, 0xAA, 0xAA, 0xAA, 0xAA]);
453 }
454
455 #[test]
456 fn rejects_buffer_shorter_than_file_header() {
457 // §2.4 requires 12 bytes; supply only 11.
458 let buf = b"RIFF\x00\x00\x00\x00WEB";
459 assert_eq!(
460 parse(buf),
461 Err(ContainerError::TooShortForHeader { got: 11 })
462 );
463 }
464
465 #[test]
466 fn rejects_wrong_riff_or_form_tag() {
467 // First the 'RIFF' tag itself is wrong.
468 let mut buf = b"riff\x04\x00\x00\x00WEBP".to_vec();
469 match parse(&buf) {
470 Err(ContainerError::NotRiff { got }) => assert_eq!(&got, b"riff"),
471 other => panic!("expected NotRiff, got {other:?}"),
472 }
473
474 // Now 'RIFF' but a non-'WEBP' form type — §2.4 demands WEBP.
475 buf[0..4].copy_from_slice(b"RIFF");
476 buf[8..12].copy_from_slice(b"AVI ");
477 match parse(&buf) {
478 Err(ContainerError::NotWebp { got }) => assert_eq!(&got, b"AVI "),
479 other => panic!("expected NotWebp, got {other:?}"),
480 }
481 }
482
483 #[test]
484 fn rejects_chunk_whose_size_overflows_riff_payload() {
485 // A 'VP8 ' header that claims Size = 100 in a RIFF whose
486 // payload only has 8 + 0 bytes of room for the chunk.
487 let mut bad = Vec::new();
488 bad.extend_from_slice(b"VP8 ");
489 bad.extend_from_slice(&100u32.to_le_bytes()); // declared size 100
490 // Wrap in a §2.4 header that says File Size = 4 (just WEBP)
491 // + 8 (the bad chunk header). The chunk's declared 100-byte
492 // payload doesn't fit in the 0 remaining bytes.
493 let buf = webp(&bad);
494 match parse(&buf) {
495 Err(ContainerError::ChunkPayloadOverflowsRiff {
496 offset,
497 declared,
498 available,
499 }) => {
500 assert_eq!(offset, 12);
501 assert_eq!(declared, 100);
502 assert_eq!(available, 0);
503 }
504 other => panic!("expected ChunkPayloadOverflowsRiff, got {other:?}"),
505 }
506 }
507
508 #[test]
509 fn rejects_odd_chunk_missing_pad_byte() {
510 // Hand-craft a RIFF whose declared File Size accounts for an
511 // odd-length chunk **without** including its §2.3 pad byte.
512 // The walker should refuse rather than read past the end of
513 // the declared payload.
514 let mut chunk_bytes = Vec::new();
515 chunk_bytes.extend_from_slice(b"ICCP");
516 chunk_bytes.extend_from_slice(&3u32.to_le_bytes()); // odd size
517 chunk_bytes.extend_from_slice(&[0xDE, 0xAD, 0xBE]); // 3 payload bytes, NO pad
518
519 // File Size = 4 ('WEBP') + len(chunk_bytes); deliberately
520 // no extra trailing pad byte beyond what we wrote.
521 let mut buf = Vec::new();
522 buf.extend_from_slice(b"RIFF");
523 buf.extend_from_slice(&(4u32 + chunk_bytes.len() as u32).to_le_bytes());
524 buf.extend_from_slice(b"WEBP");
525 buf.extend_from_slice(&chunk_bytes);
526
527 match parse(&buf) {
528 Err(ContainerError::MissingPadByte { offset }) => assert_eq!(offset, 12),
529 other => panic!("expected MissingPadByte, got {other:?}"),
530 }
531 }
532
533 #[test]
534 fn rejects_riff_size_that_runs_past_buffer() {
535 // Header says File Size = 1000 but we only supply the
536 // 12-byte header itself.
537 let mut buf = b"RIFF".to_vec();
538 buf.extend_from_slice(&1000u32.to_le_bytes());
539 buf.extend_from_slice(b"WEBP");
540 match parse(&buf) {
541 Err(ContainerError::RiffSizeOverflowsBuffer {
542 declared,
543 buffer_len,
544 }) => {
545 assert_eq!(declared, 1000);
546 assert_eq!(buffer_len, 12);
547 }
548 other => panic!("expected RiffSizeOverflowsBuffer, got {other:?}"),
549 }
550 }
551}