Skip to main content

djvu_iff/
lib.rs

1//! IFF (Interchange File Format) container parser for DjVu files.
2//!
3//! This module provides two APIs:
4//!
5//! 1. **New spec-based parser** (`parse_form`) — zero-copy, borrowing slices from
6//!    the input byte buffer. Written from the sndjvu.org specification.
7//!
8//! 2. **Legacy API** (`parse`, `Chunk`, `DjvuFile`) — the original tree-based parser
9//!    kept for internal backward compatibility while the rewrite is in progress.
10//!
11//! ## DjVu IFF layout
12//!
13//! ```text
14//! [4] magic   = "AT&T"
15//! [4] id      = "FORM"
16//! [4] length  (big-endian u32, covers form_type + all chunks)
17//! [4] form_type = "DJVU" | "DJVM" | "BM44" | "PM44"
18//! ... chunks
19//! ```
20//!
21//! Each inner chunk:
22//! ```text
23//! [4] id
24//! [4] length  (big-endian u32)
25//! [n] data    (padded to even number of bytes if length is odd)
26//! ```
27
28#![cfg_attr(not(feature = "std"), no_std)]
29#![deny(unsafe_code)]
30
31#[cfg(not(feature = "std"))]
32extern crate alloc;
33
34#[cfg(not(feature = "std"))]
35use alloc::{string::String, vec::Vec};
36#[cfg(feature = "std")]
37use std::{string::String, vec::Vec};
38
39// ---- Error types ------------------------------------------------------------
40
41/// Errors that can occur while parsing the IFF container.
42#[derive(Debug, thiserror::Error, PartialEq, Eq)]
43pub enum IffError {
44    /// Input data is too short to contain a valid IFF file.
45    #[error("input is too short to be a valid IFF file")]
46    TooShort,
47
48    /// The `AT&T` magic bytes were not found at the start of the file.
49    #[error("bad magic bytes: expected AT&T, got {got:?}")]
50    BadMagic { got: [u8; 4] },
51
52    /// The FORM type identifier is not a recognised DjVu type.
53    ///
54    /// Note: this is *not* an error — callers may encounter unknown form types
55    /// in bundled documents and should handle them gracefully.
56    #[error("unknown FORM type: {id:?}")]
57    UnknownFormType { id: [u8; 4] },
58
59    /// A chunk header claims more bytes than are available in the buffer.
60    #[error(
61        "chunk {:?} claims {} bytes but only {} are available",
62        id,
63        claimed,
64        available
65    )]
66    ChunkTooLong {
67        id: [u8; 4],
68        claimed: u32,
69        available: usize,
70    },
71
72    /// The input ended unexpectedly in the middle of a chunk.
73    #[error("unexpected end of input (truncated IFF data)")]
74    Truncated,
75}
76
77/// Original error type used by the legacy implementation.
78#[derive(Debug, Clone, PartialEq, Eq)]
79pub enum LegacyError {
80    /// Input data is shorter than expected.
81    UnexpectedEof,
82    /// A required magic number or tag was not found.
83    InvalidMagic,
84    /// A chunk or field has an invalid length.
85    InvalidLength,
86    /// A required chunk is missing.
87    MissingChunk(&'static str),
88    /// An unsupported feature or version was encountered.
89    Unsupported(&'static str),
90    /// Generic format violation.
91    FormatError(String),
92}
93
94impl core::fmt::Display for LegacyError {
95    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
96        match self {
97            LegacyError::UnexpectedEof => write!(f, "unexpected end of input"),
98            LegacyError::InvalidMagic => write!(f, "invalid magic number"),
99            LegacyError::InvalidLength => write!(f, "invalid length"),
100            LegacyError::MissingChunk(id) => write!(f, "missing required chunk: {}", id),
101            LegacyError::Unsupported(msg) => write!(f, "unsupported: {}", msg),
102            LegacyError::FormatError(msg) => write!(f, "format error: {}", msg),
103        }
104    }
105}
106
107#[cfg(feature = "std")]
108impl std::error::Error for LegacyError {}
109
110/// Alias for [`LegacyError`].
111pub use LegacyError as Error;
112
113// ---- IFF chunk types --------------------------------------------------------
114
115/// A 4-byte chunk identifier (e.g., b"FORM", b"INFO", b"Sjbz").
116pub type ChunkId = [u8; 4];
117
118/// A parsed IFF chunk — either a FORM container or a leaf data chunk.
119#[derive(Debug, Clone)]
120pub enum Chunk {
121    /// A FORM container with a secondary ID and child chunks.
122    Form {
123        /// The secondary ID (e.g., b"DJVU", b"DJVM", b"DJVI", b"THUM").
124        secondary_id: ChunkId,
125        /// Total byte length of the FORM payload (from the IFF length field).
126        /// Includes the 4-byte secondary ID and all child chunk bytes.
127        length: u32,
128        /// Child chunks within this FORM.
129        children: Vec<Chunk>,
130    },
131    /// A leaf chunk with raw data.
132    Leaf {
133        /// The chunk ID (e.g., b"INFO", b"Sjbz", b"BG44").
134        id: ChunkId,
135        /// The raw chunk payload bytes.
136        data: Vec<u8>,
137    },
138}
139
140impl Chunk {
141    /// For leaf chunks, return the data slice. For FORM chunks, returns empty slice.
142    pub fn data(&self) -> &[u8] {
143        match self {
144            Chunk::Form { .. } => &[],
145            Chunk::Leaf { data, .. } => data,
146        }
147    }
148
149    /// For FORM chunks, return children. For leaf chunks, returns empty slice.
150    pub fn children(&self) -> &[Chunk] {
151        match self {
152            Chunk::Form { children, .. } => children,
153            Chunk::Leaf { .. } => &[],
154        }
155    }
156
157    /// Return the declared payload length from the IFF length field.
158    ///
159    /// For `Form` chunks, this is the value read from the IFF header — it
160    /// covers the secondary ID (4 bytes) and all children.  For `Leaf`
161    /// chunks, this equals `data().len()`.
162    pub fn payload_length(&self) -> u32 {
163        match self {
164            Chunk::Form { length, .. } => *length,
165            Chunk::Leaf { data, .. } => data.len() as u32,
166        }
167    }
168
169    /// Find the first leaf chunk with the given ID in direct children.
170    pub fn find_first(&self, target_id: &[u8; 4]) -> Option<&Chunk> {
171        self.children().iter().find(|c| match c {
172            Chunk::Leaf { id, .. } => id == target_id,
173            _ => false,
174        })
175    }
176
177    /// Find all leaf chunks with the given ID in direct children.
178    pub fn find_all(&self, target_id: &[u8; 4]) -> Vec<&Chunk> {
179        self.children()
180            .iter()
181            .filter(|c| match c {
182                Chunk::Leaf { id, .. } => id == target_id,
183                _ => false,
184            })
185            .collect()
186    }
187}
188
189/// A parsed DjVu document (the root FORM chunk).
190#[derive(Debug, Clone)]
191pub struct DjvuFile {
192    pub root: Chunk,
193}
194
195/// Parse a DjVu file from raw bytes (legacy tree-based parser).
196///
197/// Expects the file to begin with "AT&T" magic followed by a root FORM chunk.
198pub fn parse(data: &[u8]) -> Result<DjvuFile, Error> {
199    if data.len() < 4 {
200        return Err(Error::UnexpectedEof);
201    }
202    // Check for "AT&T" magic
203    let (magic, rest) = if &data[..4] == b"AT&T" {
204        (&data[..4], &data[4..])
205    } else {
206        // Some files may not have AT&T prefix (bare FORM)
207        (&data[..0], data)
208    };
209    let _ = magic;
210
211    let (root, _) = parse_chunk(rest, 0)?;
212    Ok(DjvuFile { root })
213}
214
215/// Parse a single chunk starting at `offset` within `data`.
216/// Returns the parsed chunk and the number of bytes consumed (including padding).
217fn parse_chunk(data: &[u8], offset: usize) -> Result<(Chunk, usize), Error> {
218    if offset + 8 > data.len() {
219        return Err(Error::UnexpectedEof);
220    }
221
222    let id: ChunkId = [
223        data[offset],
224        data[offset + 1],
225        data[offset + 2],
226        data[offset + 3],
227    ];
228    let length = u32::from_be_bytes([
229        data[offset + 4],
230        data[offset + 5],
231        data[offset + 6],
232        data[offset + 7],
233    ]);
234
235    let payload_start = offset + 8;
236    let payload_end = payload_start + length as usize;
237
238    if payload_end > data.len() {
239        return Err(Error::UnexpectedEof);
240    }
241
242    // Word-align: next chunk starts at even offset
243    let total = 8 + length as usize;
244    let padded_total = total + (total % 2);
245
246    if &id == b"FORM" {
247        if length < 4 {
248            return Err(Error::InvalidLength);
249        }
250        let secondary_id: ChunkId = [
251            data[payload_start],
252            data[payload_start + 1],
253            data[payload_start + 2],
254            data[payload_start + 3],
255        ];
256
257        let children_start = payload_start + 4;
258        let children = parse_children(data, children_start, payload_end)?;
259
260        Ok((
261            Chunk::Form {
262                secondary_id,
263                length,
264                children,
265            },
266            padded_total,
267        ))
268    } else {
269        let chunk_data = data[payload_start..payload_end].to_vec();
270        Ok((
271            Chunk::Leaf {
272                id,
273                data: chunk_data,
274            },
275            padded_total,
276        ))
277    }
278}
279
280/// Parse sequential chunks within a range of bytes.
281fn parse_children(data: &[u8], start: usize, end: usize) -> Result<Vec<Chunk>, Error> {
282    let mut chunks = Vec::new();
283    let mut pos = start;
284
285    while pos < end {
286        if pos + 8 > end {
287            // Trailing bytes — some files have junk at end; tolerate it
288            break;
289        }
290        let (chunk, consumed) = parse_chunk(data, pos)?;
291        chunks.push(chunk);
292        pos += consumed;
293    }
294
295    Ok(chunks)
296}
297
298// ---- Legacy emitter (round-trip support, #195) ------------------------------
299
300/// Serialise a `DjvuFile` (legacy parser) back into the on-disk IFF byte
301/// stream, including the leading "AT&T" magic.
302///
303/// Parser/emitter contract: `parse(emit(file)) == file` for any tree
304/// previously produced by `parse(...)`. This is used by property-based
305/// round-trip tests under `tests/proptest_codecs.rs` (#195) and is small
306/// enough to keep alongside the parser; not intended as a general-purpose
307/// DjVu writer.
308pub fn emit(file: &DjvuFile) -> Vec<u8> {
309    let mut out = Vec::with_capacity(64);
310    out.extend_from_slice(b"AT&T");
311    emit_chunk(&file.root, &mut out);
312    out
313}
314
315fn emit_chunk(chunk: &Chunk, out: &mut Vec<u8>) {
316    emit_chunk_inner(chunk, out, false);
317}
318
319fn emit_chunk_inner(chunk: &Chunk, out: &mut Vec<u8>, suppress_inner_pad: bool) {
320    match chunk {
321        Chunk::Form {
322            secondary_id,
323            length: stored_length,
324            children,
325        } => {
326            // Two valid IFF layouts exist for a FORM whose last child has odd
327            // payload length:
328            //   (A) FORM declared length is odd, no pad after last child;
329            //       the outer/parent loop writes the alignment byte.
330            //   (B) FORM declared length is even, includes a pad byte after
331            //       the last child inside the FORM body.
332            // Real DjVu files mix both styles. Preserve the parser's stored
333            // length parity so unmutated subtrees round-trip byte-identical.
334            let suppress_last_pad = (*stored_length & 1) == 1;
335            let mut payload: Vec<u8> = Vec::new();
336            payload.extend_from_slice(secondary_id);
337            let n = children.len();
338            for (i, child) in children.iter().enumerate() {
339                let last = i + 1 == n;
340                emit_chunk_inner(child, &mut payload, last && suppress_last_pad);
341            }
342            let len = payload.len() as u32;
343            out.extend_from_slice(b"FORM");
344            out.extend_from_slice(&len.to_be_bytes());
345            out.extend_from_slice(&payload);
346            // Outer pad to align the next sibling in our parent. Skip when
347            // our parent told us they'll provide alignment for us.
348            let total = 8 + payload.len();
349            if !suppress_inner_pad && total % 2 == 1 {
350                out.push(0);
351            }
352        }
353        Chunk::Leaf { id, data } => {
354            let len = data.len() as u32;
355            out.extend_from_slice(id);
356            out.extend_from_slice(&len.to_be_bytes());
357            out.extend_from_slice(data);
358            let total = 8 + data.len();
359            if !suppress_inner_pad && total % 2 == 1 {
360                out.push(0);
361            }
362        }
363    }
364}
365
366// ---- New spec-based IFF parser (phase 1) ------------------------------------
367//
368// `parse_form` is a new zero-copy parser written from the sndjvu.org spec.
369// It returns `Form` and `IffChunk` types (distinct from the legacy `Chunk`).
370
371/// A parsed IFF chunk from the new spec-based parser: a 4-byte identifier
372/// plus a zero-copy slice into the original byte buffer.
373#[derive(Debug, Clone, Copy)]
374pub struct IffChunk<'a> {
375    /// The 4-byte ASCII chunk identifier.
376    pub id: [u8; 4],
377    /// The raw data bytes of this chunk (not including id or length header).
378    pub data: &'a [u8],
379}
380
381/// The top-level FORM structure parsed by the spec-based parser.
382#[derive(Debug)]
383pub struct Form<'a> {
384    /// The 4-byte FORM type (e.g. `DJVU`, `DJVM`, `BM44`, `PM44`).
385    pub form_type: [u8; 4],
386    /// All chunks contained within the FORM, in order.
387    pub chunks: Vec<IffChunk<'a>>,
388}
389
390/// Parse a DjVu IFF byte stream into a [`Form`].
391///
392/// This is the new spec-based zero-copy parser. It returns borrowed data
393/// from the input slice.
394///
395/// # Errors
396///
397/// Returns [`IffError`] if:
398/// - The data does not begin with the `AT&T` magic bytes
399/// - The FORM chunk header is missing or malformed
400/// - Any chunk extends beyond the available data
401pub fn parse_form(data: &[u8]) -> Result<Form<'_>, IffError> {
402    // Need at least: magic(4) + FORM id(4) + length(4) + form_type(4) = 16 bytes
403    if data.len() < 16 {
404        return Err(IffError::TooShort);
405    }
406
407    // Verify AT&T magic prefix
408    let magic = read_4(data, 0)?;
409    if &magic != b"AT&T" {
410        return Err(IffError::BadMagic { got: magic });
411    }
412
413    // Read FORM chunk id
414    let form_id = read_4(data, 4)?;
415    if &form_id != b"FORM" {
416        return Err(IffError::Truncated);
417    }
418
419    // Read FORM length (big-endian u32)
420    let form_len = read_u32_be(data, 8)? as usize;
421
422    // FORM data starts at byte 12 and must fit within the buffer
423    let form_data_end = 12_usize.checked_add(form_len).ok_or(IffError::Truncated)?;
424    if form_data_end > data.len() {
425        return Err(IffError::ChunkTooLong {
426            id: *b"FORM",
427            claimed: form_len as u32,
428            available: data.len().saturating_sub(12),
429        });
430    }
431
432    // Read form_type (first 4 bytes of FORM data)
433    if form_len < 4 {
434        return Err(IffError::Truncated);
435    }
436    let form_type = read_4(data, 12)?;
437
438    // Parse chunks from the FORM body (after form_type)
439    let body = data.get(16..form_data_end).ok_or(IffError::Truncated)?;
440
441    let chunks = parse_iff_chunks(body)?;
442
443    Ok(Form { form_type, chunks })
444}
445
446/// Parse a sequence of IFF chunks from a byte slice (new spec-based parser).
447///
448/// Each chunk is: `[4-byte id][4-byte big-endian length][length bytes data]`,
449/// with data padded to an even byte boundary.
450fn parse_iff_chunks(mut buf: &[u8]) -> Result<Vec<IffChunk<'_>>, IffError> {
451    let mut chunks = Vec::new();
452
453    while buf.len() >= 8 {
454        let id = read_4(buf, 0)?;
455        let data_len = read_u32_be(buf, 4)? as usize;
456
457        let data_start = 8_usize;
458        let data_end = data_start
459            .checked_add(data_len)
460            .ok_or(IffError::Truncated)?;
461
462        if data_end > buf.len() {
463            return Err(IffError::ChunkTooLong {
464                id,
465                claimed: data_len as u32,
466                available: buf.len().saturating_sub(data_start),
467            });
468        }
469
470        let chunk_data = buf.get(data_start..data_end).ok_or(IffError::Truncated)?;
471        chunks.push(IffChunk {
472            id,
473            data: chunk_data,
474        });
475
476        // Advance past this chunk; pad to even boundary
477        let padded_len = data_len + (data_len & 1);
478        let next = data_start
479            .checked_add(padded_len)
480            .ok_or(IffError::Truncated)?;
481
482        // Clamp to buf length to handle trailing padding gracefully
483        buf = buf.get(next.min(buf.len())..).ok_or(IffError::Truncated)?;
484    }
485
486    Ok(chunks)
487}
488
489/// Read 4 bytes from `data` at `offset` as a `[u8; 4]`.
490#[inline]
491fn read_4(data: &[u8], offset: usize) -> Result<[u8; 4], IffError> {
492    data.get(offset..offset + 4)
493        .and_then(|s| s.try_into().ok())
494        .ok_or(IffError::Truncated)
495}
496
497/// Read a big-endian `u32` from `data` at `offset`.
498#[inline]
499fn read_u32_be(data: &[u8], offset: usize) -> Result<u32, IffError> {
500    let b = read_4(data, offset)?;
501    Ok(u32::from_be_bytes(b))
502}
503
504// ---- Legacy dump helper (tests only) ----------------------------------------
505
506/// Produce a structural dump of the chunk tree.
507#[cfg(test)]
508pub fn dump(file: &DjvuFile) -> String {
509    let mut out = String::new();
510    dump_chunk(&file.root, 1, &mut out);
511    out
512}
513
514#[cfg(test)]
515fn dump_chunk(chunk: &Chunk, depth: usize, out: &mut String) {
516    let indent = "  ".repeat(depth);
517    match chunk {
518        Chunk::Form {
519            secondary_id,
520            length,
521            children,
522        } => {
523            let sec = std::str::from_utf8(secondary_id).unwrap_or("????");
524            out.push_str(&format!("{}FORM:{} [{}] \n", indent, sec, length));
525            for child in children {
526                dump_chunk(child, depth + 1, out);
527            }
528        }
529        Chunk::Leaf { id, data } => {
530            let id_str = std::str::from_utf8(id).unwrap_or("????");
531            out.push_str(&format!("{}{} [{}] \n", indent, id_str, data.len()));
532        }
533    }
534}
535
536#[cfg(test)]
537mod tests {
538    use super::*;
539
540    fn assets_path() -> std::path::PathBuf {
541        std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
542            .join("../../references/djvujs/library/assets")
543    }
544
545    fn golden_path() -> std::path::PathBuf {
546        std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../tests/golden/iff")
547    }
548
549    // ---- Legacy parser tests ------------------------------------------------
550
551    /// Parse our structural dump and djvudump output to comparable lines.
552    fn normalize_dump(input: &str) -> Vec<String> {
553        input
554            .lines()
555            .filter(|l| !l.trim().is_empty())
556            .map(|line| {
557                let trimmed = line.trim_end();
558                if let Some(bracket_end) = trimmed.find(']') {
559                    let structural = &trimmed[..=bracket_end];
560                    structural.trim_end().to_string()
561                } else {
562                    trimmed.to_string()
563                }
564            })
565            .collect()
566    }
567
568    fn assert_structure_matches(djvu_file: &str, golden_file: &str) {
569        let data = std::fs::read(assets_path().join(djvu_file)).unwrap();
570        let file = parse(&data).unwrap();
571        let actual = dump(&file);
572        let expected = std::fs::read_to_string(golden_path().join(golden_file)).unwrap();
573
574        let actual_lines = normalize_dump(&actual);
575        let expected_lines = normalize_dump(&expected);
576
577        assert_eq!(
578            actual_lines.len(),
579            expected_lines.len(),
580            "Line count mismatch for {} ({} vs {})",
581            djvu_file,
582            actual_lines.len(),
583            expected_lines.len()
584        );
585
586        for (i, (a, e)) in actual_lines.iter().zip(expected_lines.iter()).enumerate() {
587            assert_eq!(
588                a,
589                e,
590                "Line {} mismatch for {}\n  actual:   {:?}\n  expected: {:?}",
591                i + 1,
592                djvu_file,
593                a,
594                e
595            );
596        }
597    }
598
599    #[test]
600    fn parse_boy_jb2_legacy() {
601        let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
602        let file = parse(&data).unwrap();
603
604        match &file.root {
605            Chunk::Form {
606                secondary_id,
607                children,
608                ..
609            } => {
610                assert_eq!(secondary_id, b"DJVU");
611                assert_eq!(children.len(), 2);
612            }
613            _ => panic!("expected FORM root"),
614        }
615    }
616
617    #[test]
618    fn structure_boy_jb2() {
619        assert_structure_matches("boy_jb2.djvu", "boy_jb2.dump");
620    }
621
622    #[test]
623    fn structure_boy() {
624        assert_structure_matches("boy.djvu", "boy.dump");
625    }
626
627    #[test]
628    fn structure_chicken() {
629        assert_structure_matches("chicken.djvu", "chicken.dump");
630    }
631
632    #[test]
633    fn structure_carte() {
634        assert_structure_matches("carte.djvu", "carte.dump");
635    }
636
637    #[test]
638    fn structure_navm_fgbz() {
639        assert_structure_matches("navm_fgbz.djvu", "navm_fgbz.dump");
640    }
641
642    #[test]
643    fn structure_colorbook() {
644        assert_structure_matches("colorbook.djvu", "colorbook.dump");
645    }
646
647    #[test]
648    fn structure_djvu3spec_bundled() {
649        assert_structure_matches("DjVu3Spec_bundled.djvu", "djvu3spec_bundled.dump");
650    }
651
652    #[test]
653    fn structure_big_scanned_page() {
654        assert_structure_matches("big-scanned-page.djvu", "big_scanned_page.dump");
655    }
656
657    // ---- New spec-based parser tests ----------------------------------------
658
659    /// Build a minimal valid single-page DjVu file in memory for testing.
660    fn minimal_djvu_bytes() -> Vec<u8> {
661        let info_data: &[u8] = &[
662            0x00, 0xB5, // width = 181
663            0x00, 0xF0, // height = 240
664            0x18, // minor version
665            0x00, // major version
666            0x64, 0x00, // dpi = 100 (little-endian)
667            0x16, // gamma byte = 22 → 2.2
668            0x00, // flags: no rotation
669        ];
670        let info_len = info_data.len() as u32;
671
672        let mut chunk = Vec::new();
673        chunk.extend_from_slice(b"INFO");
674        chunk.extend_from_slice(&info_len.to_be_bytes());
675        chunk.extend_from_slice(info_data);
676
677        let mut form_body = Vec::new();
678        form_body.extend_from_slice(b"DJVU");
679        form_body.extend_from_slice(&chunk);
680
681        let form_len = form_body.len() as u32;
682
683        let mut file = Vec::new();
684        file.extend_from_slice(b"AT&T");
685        file.extend_from_slice(b"FORM");
686        file.extend_from_slice(&form_len.to_be_bytes());
687        file.extend_from_slice(&form_body);
688
689        file
690    }
691
692    #[test]
693    fn empty_input_is_error() {
694        let result = parse_form(&[]);
695        assert!(result.is_err());
696        assert_eq!(result.unwrap_err(), IffError::TooShort);
697    }
698
699    #[test]
700    fn short_input_is_error() {
701        let result = parse_form(&[0u8; 10]);
702        assert!(result.is_err());
703        assert_eq!(result.unwrap_err(), IffError::TooShort);
704    }
705
706    #[test]
707    fn bad_magic_is_error() {
708        let mut data = minimal_djvu_bytes();
709        data[0] = 0xFF;
710        data[1] = 0xFF;
711        data[2] = 0xFF;
712        data[3] = 0xFF;
713
714        let result = parse_form(&data);
715        assert!(result.is_err());
716        assert_eq!(
717            result.unwrap_err(),
718            IffError::BadMagic {
719                got: [0xFF, 0xFF, 0xFF, 0xFF]
720            }
721        );
722    }
723
724    #[test]
725    fn valid_single_page_parses() {
726        let data = minimal_djvu_bytes();
727        let form = parse_form(&data).expect("should parse successfully");
728
729        assert_eq!(&form.form_type, b"DJVU");
730        assert_eq!(form.chunks.len(), 1);
731        assert_eq!(&form.chunks[0].id, b"INFO");
732        assert_eq!(form.chunks[0].data.len(), 10);
733    }
734
735    #[test]
736    fn truncated_chunk_is_error() {
737        let mut data = minimal_djvu_bytes();
738        let new_len = data.len() - 4;
739        data.truncate(new_len);
740
741        let result = parse_form(&data);
742        assert!(result.is_err());
743        match result.unwrap_err() {
744            IffError::ChunkTooLong { .. } | IffError::Truncated => {}
745            other => panic!("expected ChunkTooLong or Truncated, got {:?}", other),
746        }
747    }
748
749    #[test]
750    fn unknown_form_type_allowed() {
751        let mut data = minimal_djvu_bytes();
752        data[12] = b'X';
753        data[13] = b'X';
754        data[14] = b'X';
755        data[15] = b'X';
756
757        let form = parse_form(&data).expect("unknown form type should still parse");
758        assert_eq!(&form.form_type, b"XXXX");
759    }
760
761    #[test]
762    fn real_chicken_djvu_parses() {
763        let path = assets_path().join("chicken.djvu");
764        let data = std::fs::read(&path).expect("chicken.djvu must exist");
765        let form = parse_form(&data).expect("chicken.djvu should parse");
766
767        assert_eq!(&form.form_type, b"DJVU");
768        assert!(!form.chunks.is_empty(), "must have at least one chunk");
769        assert_eq!(&form.chunks[0].id, b"INFO");
770        assert!(form.chunks[0].data.len() >= 10);
771    }
772
773    #[test]
774    fn real_multipage_djvu_parses() {
775        let path = assets_path().join("navm_fgbz.djvu");
776        let data = std::fs::read(&path).expect("navm_fgbz.djvu must exist");
777        let form = parse_form(&data).expect("navm_fgbz.djvu should parse");
778
779        assert_eq!(&form.form_type, b"DJVM");
780        assert!(!form.chunks.is_empty());
781    }
782
783    #[test]
784    fn odd_length_chunk_padding() {
785        let chunk1_data: &[u8] = &[0xAA, 0xBB, 0xCC, 0xDD, 0xEE]; // 5 bytes → padded to 6
786        let chunk2_data: &[u8] = &[0x01, 0x02]; // 2 bytes
787
788        let mut form_body: Vec<u8> = Vec::new();
789        form_body.extend_from_slice(b"DJVU");
790
791        form_body.extend_from_slice(b"TST1");
792        form_body.extend_from_slice(&5u32.to_be_bytes());
793        form_body.extend_from_slice(chunk1_data);
794        form_body.push(0x00); // padding byte
795
796        form_body.extend_from_slice(b"TST2");
797        form_body.extend_from_slice(&2u32.to_be_bytes());
798        form_body.extend_from_slice(chunk2_data);
799
800        let form_len = form_body.len() as u32;
801
802        let mut file: Vec<u8> = Vec::new();
803        file.extend_from_slice(b"AT&T");
804        file.extend_from_slice(b"FORM");
805        file.extend_from_slice(&form_len.to_be_bytes());
806        file.extend_from_slice(&form_body);
807
808        let form = parse_form(&file).expect("should parse padded chunk");
809        assert_eq!(form.chunks.len(), 2);
810        assert_eq!(&form.chunks[0].id, b"TST1");
811        assert_eq!(form.chunks[0].data, chunk1_data);
812        assert_eq!(&form.chunks[1].id, b"TST2");
813        assert_eq!(form.chunks[1].data, chunk2_data);
814    }
815}