Skip to main content

djvu_rs/
djvu_mut.rs

1//! In-place DjVu document mutation — byte-preserving rewrite of the IFF tree.
2//!
3//! PR1 of [#222](https://github.com/matyushkin/djvu-rs/issues/222). This is the
4//! foundation layer: parse a document into an editable tree, walk to a leaf
5//! chunk by path, replace its data, and serialise back. When no mutations have
6//! happened, [`DjVuDocumentMut::into_bytes`] returns the original bytes
7//! verbatim (byte-identical round-trip).
8//!
9//! Future PRs in the [#222](https://github.com/matyushkin/djvu-rs/issues/222)
10//! sequence add high-level setters (`set_metadata`, `set_bookmarks`,
11//! `page_mut(i).set_text_layer`, `…set_annotations`) plus indirect-DJVM
12//! support, which all build on the chunk-replacement primitive defined here.
13//!
14//! ## Example
15//!
16//! ```no_run
17//! use djvu_rs::djvu_mut::DjVuDocumentMut;
18//!
19//! let original = std::fs::read("doc.djvu").unwrap();
20//! let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
21//!
22//! // Round-trip byte-identical without edits:
23//! assert_eq!(doc.clone().into_bytes(), original);
24//!
25//! // Replace a leaf chunk's payload by path:
26//! doc.replace_leaf(&[0], b"new payload".to_vec()).unwrap();
27//! let edited = doc.into_bytes();
28//! ```
29//!
30//! ## Path format
31//!
32//! A `path: &[usize]` is a sequence of child indices to walk from the root
33//! `FORM` chunk. The root itself is never indexed — `[0]` selects the first
34//! child of the root.
35//!
36//! For a single-page `FORM:DJVU`: `[i]` selects the i-th leaf chunk
37//! (e.g. `INFO`, `Sjbz`, `BG44`). For a bundled `FORM:DJVM`:
38//! `[0]` selects the `DIRM` chunk, `[1]` selects the `NAVM` chunk (if
39//! present), `[i]` thereafter selects the i-th component `FORM:DJVU`. To
40//! reach a leaf inside that component: `[i, j]`.
41
42#[cfg(not(feature = "std"))]
43use alloc::vec::Vec;
44
45use crate::annotation::{Annotation, MapArea, encode_annotations_bzz};
46use crate::djvu_document::DjVuBookmark;
47use crate::error::{IffError, LegacyError};
48use crate::iff::{self, Chunk, DjvuFile};
49use crate::info::PageInfo;
50use crate::metadata::{DjVuMetadata, encode_metadata_bzz};
51use crate::navm_encode::encode_navm;
52use crate::text::TextLayer;
53use crate::text_encode::encode_text_layer;
54
55/// Errors produced by [`DjVuDocumentMut`] operations.
56#[derive(Debug, thiserror::Error)]
57pub enum MutError {
58    /// IFF parse error during [`DjVuDocumentMut::from_bytes`].
59    #[error("IFF parse error: {0}")]
60    Parse(#[from] LegacyError),
61
62    /// The path indexed past the end of a FORM's children.
63    #[error("chunk path out of range: index {index} at depth {depth} (form has {len} children)")]
64    PathOutOfRange {
65        index: usize,
66        depth: usize,
67        len: usize,
68    },
69
70    /// The path traversed into a leaf chunk and tried to keep going.
71    #[error("chunk path enters a leaf at depth {depth} but is {len} levels long")]
72    PathTraversesLeaf { depth: usize, len: usize },
73
74    /// `replace_leaf` was called with a path that ends on a `FORM` chunk
75    /// rather than a leaf.
76    #[error("path ends on a FORM, not a leaf chunk")]
77    NotALeaf,
78
79    /// The path is empty — must contain at least one index.
80    #[error("path must not be empty")]
81    EmptyPath,
82
83    /// `page_mut` was called with an index past the document's page count.
84    #[error("page index {index} out of range (document has {count} pages)")]
85    PageOutOfRange {
86        /// Requested page index.
87        index: usize,
88        /// Number of pages in the document.
89        count: usize,
90    },
91
92    /// The page has no INFO chunk, which is required to encode chunks whose
93    /// payload depends on page height (currently `set_text_layer`).
94    #[error("page has no INFO chunk; cannot encode height-dependent chunk")]
95    MissingPageInfo,
96
97    /// The page's INFO chunk failed to parse.
98    #[error("INFO chunk parse error: {0}")]
99    InfoParse(#[from] IffError),
100
101    /// The operation requires DIRM offset recomputation, which is not
102    /// implemented for indirect (non-bundled) `FORM:DJVM` documents — those
103    /// reference page bytes in external files via a resolver, so editing them
104    /// in place would also need the external files rewritten. Tracked as a
105    /// follow-up PR (PR5) in the
106    /// [#222](https://github.com/matyushkin/djvu-rs/issues/222) sequence.
107    #[error("mutation of indirect DJVM documents is not supported")]
108    IndirectDjvmUnsupported,
109
110    /// The DIRM chunk was malformed in a way that prevents offset
111    /// recomputation. Should not occur after a successful
112    /// [`DjVuDocumentMut::from_bytes`] on a well-formed DJVM document.
113    #[error("DIRM chunk is malformed: {0}")]
114    DirmMalformed(&'static str),
115
116    /// The number of `FORM:DJVU`/`FORM:DJVI` components in the bundle does
117    /// not match the count recorded in DIRM. Indicates a structurally
118    /// inconsistent document.
119    #[error("DIRM component count {dirm} does not match bundle child count {children}")]
120    DirmComponentCountMismatch {
121        /// Component count read from DIRM (`nfiles`).
122        dirm: usize,
123        /// Actual count of `FORM:DJVU`/`FORM:DJVI` children in the root.
124        children: usize,
125    },
126
127    /// `set_bookmarks` was called on a `FORM:DJVU` (single-page) document.
128    /// NAVM bookmarks live in `FORM:DJVM` bundles only.
129    #[error("set_bookmarks requires a FORM:DJVM bundle (this document is FORM:DJVU)")]
130    BookmarksRequireDjvm,
131}
132
133/// A DjVu document opened for in-place mutation.
134///
135/// Holds a parsed [`DjvuFile`] tree plus the original byte buffer, so that
136/// [`Self::into_bytes`] returns a byte-identical copy when no edits have been
137/// made. After any mutation the dirty flag is set and serialisation falls
138/// through to [`iff::emit`], which reconstructs the IFF stream from the tree
139/// (see the parser/emitter contract in `src/iff.rs`).
140#[derive(Debug, Clone)]
141pub struct DjVuDocumentMut {
142    file: DjvuFile,
143    /// Original bytes of the document.  Held so an unedited round-trip is
144    /// byte-identical without re-emitting through `iff::emit` (which
145    /// recomputes FORM lengths and would not necessarily match the original
146    /// byte layout for documents with inconsistent headers).
147    original_bytes: Vec<u8>,
148    dirty: bool,
149}
150
151impl DjVuDocumentMut {
152    /// Parse a DjVu document for mutation. Validates the IFF tree.
153    ///
154    /// The original bytes are retained so that a no-edit round-trip via
155    /// [`Self::into_bytes`] is byte-identical to the input.
156    pub fn from_bytes(data: &[u8]) -> Result<Self, MutError> {
157        let file = iff::parse(data)?;
158        Ok(Self {
159            file,
160            original_bytes: data.to_vec(),
161            dirty: false,
162        })
163    }
164
165    /// Number of direct children of the root FORM chunk.
166    ///
167    /// For a single-page `FORM:DJVU` this is the number of leaf chunks
168    /// (`INFO`, `Sjbz`, …). For a bundled `FORM:DJVM` it is `DIRM` + optional
169    /// `NAVM` + per-page component `FORM`s.
170    pub fn root_child_count(&self) -> usize {
171        self.file.root.children().len()
172    }
173
174    /// Return the 4-byte FORM type of the root (e.g. `b"DJVU"`, `b"DJVM"`).
175    /// Returns `None` if the root is somehow a leaf — should never happen on
176    /// a well-formed input that survived `from_bytes`.
177    pub fn root_form_type(&self) -> Option<&[u8; 4]> {
178        match &self.file.root {
179            Chunk::Form { secondary_id, .. } => Some(secondary_id),
180            Chunk::Leaf { .. } => None,
181        }
182    }
183
184    /// Replace the data of the leaf chunk reached by `path`.
185    ///
186    /// `path` is a sequence of child indices walked from the root FORM's
187    /// children. The walk descends into any FORM it encounters at an
188    /// intermediate index; the final index must address a leaf.
189    ///
190    /// # Errors
191    ///
192    /// - [`MutError::EmptyPath`] if `path.is_empty()`.
193    /// - [`MutError::PathOutOfRange`] if any index exceeds a FORM's child count.
194    /// - [`MutError::PathTraversesLeaf`] if the path tries to descend past a leaf.
195    /// - [`MutError::NotALeaf`] if the final chunk is a FORM rather than a leaf.
196    pub fn replace_leaf(&mut self, path: &[usize], new_data: Vec<u8>) -> Result<(), MutError> {
197        let chunk = self.chunk_at_path_mut(path)?;
198        match chunk {
199            Chunk::Leaf { data, .. } => {
200                *data = new_data;
201                self.dirty = true;
202                Ok(())
203            }
204            Chunk::Form { .. } => Err(MutError::NotALeaf),
205        }
206    }
207
208    /// Return the chunk at `path` for inspection (without mutation).
209    pub fn chunk_at_path(&self, path: &[usize]) -> Result<&Chunk, MutError> {
210        if path.is_empty() {
211            return Err(MutError::EmptyPath);
212        }
213        let mut current = &self.file.root;
214        for (depth, &idx) in path.iter().enumerate() {
215            let children = current.children();
216            if children.is_empty() && depth < path.len() - 1 {
217                // We're inside a leaf but the path keeps going.
218                return Err(MutError::PathTraversesLeaf {
219                    depth,
220                    len: path.len(),
221                });
222            }
223            if let Chunk::Leaf { .. } = current {
224                return Err(MutError::PathTraversesLeaf {
225                    depth,
226                    len: path.len(),
227                });
228            }
229            if idx >= children.len() {
230                return Err(MutError::PathOutOfRange {
231                    index: idx,
232                    depth,
233                    len: children.len(),
234                });
235            }
236            current = &children[idx];
237        }
238        Ok(current)
239    }
240
241    fn chunk_at_path_mut(&mut self, path: &[usize]) -> Result<&mut Chunk, MutError> {
242        if path.is_empty() {
243            return Err(MutError::EmptyPath);
244        }
245        // Validate path first using the immutable walk.  This avoids the
246        // borrow-checker dance of validating during a mutable walk.
247        let _ = self.chunk_at_path(path)?;
248        // Now walk for real with `&mut`.
249        let mut current = &mut self.file.root;
250        for &idx in path {
251            // Validation above guarantees the indices are in range and that
252            // we never index into a leaf, so this match is total.
253            match current {
254                Chunk::Form { children, .. } => {
255                    current = &mut children[idx];
256                }
257                Chunk::Leaf { .. } => unreachable!("validated by chunk_at_path"),
258            }
259        }
260        Ok(current)
261    }
262
263    /// Whether any mutation has been applied since `from_bytes`.
264    pub fn is_dirty(&self) -> bool {
265        self.dirty
266    }
267
268    /// Serialise the document back to bytes.
269    ///
270    /// When [`Self::is_dirty`] is `false`, this returns the bytes passed to
271    /// [`Self::from_bytes`] verbatim. After any mutation it falls through to
272    /// [`iff::emit`] which reconstructs the IFF stream from the parsed tree;
273    /// for `FORM:DJVM` bundles the `DIRM` offsets are recomputed first so
274    /// they point at the correct component positions in the new output.
275    ///
276    /// # Panics
277    ///
278    /// Panics if `DIRM` offset recomputation fails — this only happens on a
279    /// structurally inconsistent document (DIRM `nfiles` not matching the
280    /// bundle's child count, etc.) which a successful [`Self::from_bytes`]
281    /// would already have rejected. Use [`Self::try_into_bytes`] to recover
282    /// the error without panicking.
283    pub fn into_bytes(self) -> Vec<u8> {
284        self.try_into_bytes()
285            .expect("DIRM recomputation failed — inconsistent document")
286    }
287
288    /// Like [`Self::into_bytes`] but returns the [`MutError`] from `DIRM`
289    /// offset recomputation rather than panicking.
290    pub fn try_into_bytes(mut self) -> Result<Vec<u8>, MutError> {
291        if !self.dirty {
292            return Ok(self.original_bytes);
293        }
294        recompute_dirm_offsets(&mut self.file.root)?;
295        Ok(iff::emit(&self.file))
296    }
297
298    // ---- High-level setters (PR2 of #222) ----------------------------------
299
300    /// Number of editable pages in the document.
301    ///
302    /// `1` for `FORM:DJVU`, the count of `FORM:DJVU` children for `FORM:DJVM`
303    /// (shared-dictionary `FORM:DJVI` components are not counted as pages).
304    pub fn page_count(&self) -> usize {
305        match self.root_form_type() {
306            Some(b"DJVM") => self
307                .file
308                .root
309                .children()
310                .iter()
311                .filter(
312                    |c| matches!(c, Chunk::Form { secondary_id, .. } if secondary_id == b"DJVU"),
313                )
314                .count(),
315            _ => 1,
316        }
317    }
318
319    /// Borrow the i-th page's `FORM:DJVU` for high-level mutation.
320    ///
321    /// For single-page `FORM:DJVU` only `index == 0` is valid. For bundled
322    /// `FORM:DJVM` the index walks `FORM:DJVU` direct children in order
323    /// (shared-dictionary `FORM:DJVI` components are skipped).
324    ///
325    /// On serialisation, [`Self::into_bytes`] rewrites DIRM offsets to
326    /// reflect any size changes from page mutations.
327    ///
328    /// # Errors
329    ///
330    /// - [`MutError::PageOutOfRange`] if `index >= self.page_count()`.
331    /// - [`MutError::IndirectDjvmUnsupported`] if the document is an
332    ///   indirect (non-bundled) `FORM:DJVM` — page bytes live in external
333    ///   files, so editing in place is not supported by this primitive.
334    pub fn page_mut(&mut self, index: usize) -> Result<PageMut<'_>, MutError> {
335        let count = self.page_count();
336        if index >= count {
337            return Err(MutError::PageOutOfRange { index, count });
338        }
339        let root_form_type = *self.root_form_type().expect("from_bytes validated FORM");
340        if &root_form_type == b"DJVU" {
341            debug_assert_eq!(index, 0);
342            return Ok(PageMut {
343                form: &mut self.file.root,
344                dirty: &mut self.dirty,
345            });
346        }
347        debug_assert_eq!(&root_form_type, b"DJVM");
348        if !is_bundled_djvm(&self.file.root) {
349            return Err(MutError::IndirectDjvmUnsupported);
350        }
351        // Walk the root's children, returning the index-th FORM:DJVU.
352        let children = match &mut self.file.root {
353            Chunk::Form { children, .. } => children,
354            Chunk::Leaf { .. } => unreachable!("validated FORM root"),
355        };
356        let mut seen = 0usize;
357        for child in children.iter_mut() {
358            if let Chunk::Form { secondary_id, .. } = child
359                && secondary_id == b"DJVU"
360            {
361                if seen == index {
362                    return Ok(PageMut {
363                        form: child,
364                        dirty: &mut self.dirty,
365                    });
366                }
367                seen += 1;
368            }
369        }
370        unreachable!("page_count agreed with bundle but iteration disagreed")
371    }
372
373    /// Replace, insert, or remove the document's `NAVM` bookmark chunk.
374    ///
375    /// Empty `bookmarks` removes any existing NAVM. The chunk lives at the
376    /// `FORM:DJVM` bundle root, between `DIRM` and the per-page components,
377    /// and the encoder uses [`encode_navm`].
378    ///
379    /// # Errors
380    ///
381    /// - [`MutError::BookmarksRequireDjvm`] if the document is a single-page
382    ///   `FORM:DJVU` (no NAVM in non-bundled documents per the DjVu spec).
383    pub fn set_bookmarks(&mut self, bookmarks: &[DjVuBookmark]) -> Result<(), MutError> {
384        let root_form_type = *self.root_form_type().expect("from_bytes validated FORM");
385        if &root_form_type != b"DJVM" {
386            return Err(MutError::BookmarksRequireDjvm);
387        }
388        let children = match &mut self.file.root {
389            Chunk::Form { children, .. } => children,
390            Chunk::Leaf { .. } => unreachable!("validated FORM root"),
391        };
392        let pos = children
393            .iter()
394            .position(|c| matches!(c, Chunk::Leaf { id, .. } if id == b"NAVM"));
395        match (pos, bookmarks.is_empty()) {
396            (Some(i), true) => {
397                children.remove(i);
398            }
399            (Some(i), false) => {
400                children[i] = Chunk::Leaf {
401                    id: *b"NAVM",
402                    data: encode_navm(bookmarks),
403                };
404            }
405            (None, true) => { /* nothing to remove and nothing to insert */ }
406            (None, false) => {
407                // Insert NAVM right after DIRM if present, else right after
408                // the secondary id (i.e. as the first child). DIRM is the
409                // first chunk in a well-formed bundle.
410                let dirm_pos = children
411                    .iter()
412                    .position(|c| matches!(c, Chunk::Leaf { id, .. } if id == b"DIRM"));
413                let insert_at = dirm_pos.map(|i| i + 1).unwrap_or(0);
414                children.insert(
415                    insert_at,
416                    Chunk::Leaf {
417                        id: *b"NAVM",
418                        data: encode_navm(bookmarks),
419                    },
420                );
421            }
422        }
423        self.dirty = true;
424        Ok(())
425    }
426}
427
428/// Whether `chunk` is a bundled (rather than indirect) `FORM:DJVM`.
429///
430/// Returns `false` for any non-DJVM chunk.
431fn is_bundled_djvm(chunk: &Chunk) -> bool {
432    let Chunk::Form {
433        secondary_id,
434        children,
435        ..
436    } = chunk
437    else {
438        return false;
439    };
440    if secondary_id != b"DJVM" {
441        return false;
442    }
443    children.iter().any(|c| {
444        matches!(c, Chunk::Leaf { id, data } if id == b"DIRM" && !data.is_empty() && (data[0] & 0x80) != 0)
445    })
446}
447
448/// Compute the byte length the chunk will occupy when emitted by [`iff::emit`]:
449/// 8-byte chunk header + payload + word-alignment padding.
450///
451/// For `FORM` chunks the payload is recomputed recursively (4 bytes for
452/// secondary_id + sum of children's emitted sizes), to mirror what
453/// [`iff::emit`] writes after a tree mutation.
454fn emitted_chunk_size(chunk: &Chunk) -> usize {
455    match chunk {
456        Chunk::Form {
457            secondary_id: _,
458            children,
459            ..
460        } => {
461            let payload: usize = 4 + children.iter().map(emitted_chunk_size).sum::<usize>();
462            let total = 8 + payload;
463            total + (total & 1)
464        }
465        Chunk::Leaf { data, .. } => {
466            let total = 8 + data.len();
467            total + (total & 1)
468        }
469    }
470}
471
472/// Recompute the absolute byte offsets stored in the `DIRM` chunk so they
473/// point at each `FORM:DJVU`/`FORM:DJVI` component in the about-to-be-emitted
474/// document.
475///
476/// Offsets in DIRM are absolute file-byte positions (from the leading
477/// `b"AT&T"` magic) of each component's outer `b"FORM"` chunk header. After a
478/// page-chunk mutation those positions shift, and viewers that use DIRM for
479/// page navigation see the wrong bytes if the table is not refreshed.
480///
481/// No-op for non-DJVM roots and for indirect DIRM (no offset table).
482fn recompute_dirm_offsets(root: &mut Chunk) -> Result<(), MutError> {
483    let Chunk::Form {
484        secondary_id,
485        children,
486        ..
487    } = root
488    else {
489        return Ok(());
490    };
491    if secondary_id != b"DJVM" {
492        return Ok(());
493    }
494
495    // Absolute byte position of the next chunk inside the FORM:DJVM body:
496    // AT&T(4) + FORM(4) + length(4) + secondary_id "DJVM"(4) = 16.
497    let mut pos: usize = 16;
498    let mut new_offsets: Vec<u32> = Vec::new();
499    let mut dirm_idx: Option<usize> = None;
500
501    // The `id == b"DIRM"` guard form is needed: `id` is `[u8; 4]` reached
502    // through a `&` reference, so a by-value pattern would require `*b"DIRM"`
503    // which clippy's redundant-guards autofix doesn't propose.
504    #[allow(clippy::redundant_guards)]
505    for (i, child) in children.iter().enumerate() {
506        match child {
507            Chunk::Leaf { id, .. } if id == b"DIRM" => {
508                dirm_idx = Some(i);
509            }
510            Chunk::Form {
511                secondary_id: sid, ..
512            } if sid == b"DJVU" || sid == b"DJVI" || sid == b"THUM" => {
513                new_offsets.push(u32::try_from(pos).map_err(|_| {
514                    MutError::DirmMalformed("component offset exceeds u32 (file > 4 GiB)")
515                })?);
516            }
517            _ => {}
518        }
519        pos += emitted_chunk_size(child);
520    }
521
522    let Some(dirm_idx) = dirm_idx else {
523        // Bundled DJVM with no DIRM is malformed by spec, but tolerate it
524        // (parse_dirm would have failed during from_bytes if it mattered).
525        return Ok(());
526    };
527
528    let dirm = &mut children[dirm_idx];
529    let Chunk::Leaf { data, .. } = dirm else {
530        return Err(MutError::DirmMalformed("DIRM is not a leaf chunk"));
531    };
532
533    if data.len() < 3 {
534        return Err(MutError::DirmMalformed("DIRM payload < 3 bytes"));
535    }
536    let bundled = (data[0] & 0x80) != 0;
537    if !bundled {
538        // Indirect DIRM has no offset table to update.
539        return Ok(());
540    }
541    let nfiles = u16::from_be_bytes([data[1], data[2]]) as usize;
542    if nfiles != new_offsets.len() {
543        return Err(MutError::DirmComponentCountMismatch {
544            dirm: nfiles,
545            children: new_offsets.len(),
546        });
547    }
548    let needed = 3usize
549        .checked_add(4 * nfiles)
550        .ok_or(MutError::DirmMalformed("DIRM offset table size overflow"))?;
551    if data.len() < needed {
552        return Err(MutError::DirmMalformed("DIRM offset table truncated"));
553    }
554    for (i, &off) in new_offsets.iter().enumerate() {
555        let base = 3 + i * 4;
556        data[base..base + 4].copy_from_slice(&off.to_be_bytes());
557    }
558    Ok(())
559}
560
561/// A mutable handle to one page's `FORM:DJVU` chunk inside a
562/// [`DjVuDocumentMut`]. Returned by [`DjVuDocumentMut::page_mut`].
563///
564/// Each setter replaces the corresponding chunk in place, or appends a new
565/// chunk if the page does not have one yet. The compressed `*z` chunk variant
566/// is preferred on insert (TXTz / ANTz / METz) for size; if an existing
567/// uncompressed `*a` chunk is present, the setter replaces *that* chunk and
568/// upgrades its identifier to the `*z` form.
569pub struct PageMut<'doc> {
570    form: &'doc mut Chunk,
571    dirty: &'doc mut bool,
572}
573
574impl PageMut<'_> {
575    /// Replace (or insert) the page's text layer with the BZZ-compressed
576    /// `TXTz` form of `layer`. Page height is read from the page's `INFO`
577    /// chunk; missing INFO yields [`MutError::MissingPageInfo`].
578    pub fn set_text_layer(&mut self, layer: &TextLayer) -> Result<(), MutError> {
579        let info_data = self
580            .find_leaf_data(b"INFO")
581            .ok_or(MutError::MissingPageInfo)?;
582        let info = PageInfo::parse(info_data)?;
583        let plain = encode_text_layer(layer, info.height as u32);
584        let compressed = crate::bzz_encode::bzz_encode(&plain);
585        self.replace_or_insert_text(compressed);
586        *self.dirty = true;
587        Ok(())
588    }
589
590    /// Replace (or insert) the page's annotation chunk with the
591    /// BZZ-compressed `ANTz` form of `(annotation, areas)`.
592    pub fn set_annotations(&mut self, annotation: &Annotation, areas: &[MapArea]) {
593        let bytes = encode_annotations_bzz(annotation, areas);
594        self.replace_or_insert(b"ANTa", b"ANTz", bytes);
595        *self.dirty = true;
596    }
597
598    /// Replace (or insert) the page's metadata chunk with the
599    /// BZZ-compressed `METz` form of `meta`. An empty `meta` value removes
600    /// any existing METa/METz chunk.
601    pub fn set_metadata(&mut self, meta: &DjVuMetadata) {
602        let bytes = encode_metadata_bzz(meta);
603        self.replace_or_insert(b"METa", b"METz", bytes);
604        *self.dirty = true;
605    }
606
607    fn find_leaf_data(&self, id: &[u8; 4]) -> Option<&[u8]> {
608        for child in self.form.children() {
609            if let Chunk::Leaf { id: cid, data } = child
610                && cid == id
611            {
612                return Some(data);
613            }
614        }
615        None
616    }
617
618    /// Replace either the `*a` or `*z` variant of a chunk pair, picking `*z`
619    /// (compressed) for any newly inserted chunk. If `data` is empty, removes
620    /// the existing chunk (whichever variant is present) and does not insert.
621    fn replace_or_insert(&mut self, id_a: &[u8; 4], id_z: &[u8; 4], data: Vec<u8>) {
622        let children = match self.form {
623            Chunk::Form { children, .. } => children,
624            Chunk::Leaf { .. } => unreachable!("PageMut wraps a FORM"),
625        };
626        let pos = children
627            .iter()
628            .position(|c| matches!(c, Chunk::Leaf { id, .. } if id == id_a || id == id_z));
629        match (pos, data.is_empty()) {
630            (Some(i), true) => {
631                children.remove(i);
632            }
633            (Some(i), false) => {
634                children[i] = Chunk::Leaf { id: *id_z, data };
635            }
636            (None, true) => { /* nothing to remove and nothing to insert */ }
637            (None, false) => {
638                children.push(Chunk::Leaf { id: *id_z, data });
639            }
640        }
641    }
642
643    /// TXTa / TXTz variant of `replace_or_insert` (kept separate for clarity).
644    fn replace_or_insert_text(&mut self, data: Vec<u8>) {
645        self.replace_or_insert(b"TXTa", b"TXTz", data);
646    }
647}
648
649#[cfg(test)]
650#[allow(clippy::field_reassign_with_default)]
651mod tests {
652    use super::*;
653    use std::path::PathBuf;
654
655    fn corpus_path(name: &str) -> PathBuf {
656        let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
657        p.push("tests/fixtures");
658        p.push(name);
659        p
660    }
661
662    fn read_corpus(name: &str) -> Vec<u8> {
663        std::fs::read(corpus_path(name)).expect("corpus fixture missing")
664    }
665
666    /// Round-trip without edits is byte-identical on a single-page document.
667    #[test]
668    fn roundtrip_byte_identical_chicken() {
669        let original = read_corpus("chicken.djvu");
670        let doc = DjVuDocumentMut::from_bytes(&original).unwrap();
671        assert!(!doc.is_dirty());
672        assert_eq!(doc.into_bytes(), original);
673    }
674
675    /// Round-trip without edits is byte-identical on a bilevel JB2 document.
676    #[test]
677    fn roundtrip_byte_identical_boy_jb2() {
678        let original = read_corpus("boy_jb2.djvu");
679        let doc = DjVuDocumentMut::from_bytes(&original).unwrap();
680        assert_eq!(doc.into_bytes(), original);
681    }
682
683    /// Round-trip without edits is byte-identical on a multi-page DJVM bundle.
684    #[test]
685    fn roundtrip_byte_identical_djvm_bundle() {
686        let original = read_corpus("DjVu3Spec_bundled.djvu");
687        let doc = DjVuDocumentMut::from_bytes(&original).unwrap();
688        assert_eq!(doc.root_form_type(), Some(b"DJVM"));
689        assert_eq!(doc.into_bytes(), original);
690    }
691
692    /// Round-trip without edits is byte-identical on a navm/fgbz document.
693    #[test]
694    fn roundtrip_byte_identical_navm() {
695        let original = read_corpus("navm_fgbz.djvu");
696        let doc = DjVuDocumentMut::from_bytes(&original).unwrap();
697        assert_eq!(doc.into_bytes(), original);
698    }
699
700    /// `replace_leaf` mutates in place and the serialised output reflects it.
701    #[test]
702    fn replace_leaf_changes_emitted_bytes() {
703        let original = read_corpus("chicken.djvu");
704        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
705
706        // Walk to the first leaf — for chicken.djvu (FORM:DJVU) this is INFO.
707        let first = doc.chunk_at_path(&[0]).unwrap();
708        let original_first_data = first.data().to_vec();
709        assert!(!original_first_data.is_empty());
710
711        // Replace with a marker and serialise.
712        let marker = b"PR1_TEST_MARKER".to_vec();
713        doc.replace_leaf(&[0], marker.clone()).unwrap();
714        assert!(doc.is_dirty());
715
716        let edited = doc.into_bytes();
717
718        // Re-parse the edited bytes and confirm the leaf payload changed.
719        let reparsed = DjVuDocumentMut::from_bytes(&edited).unwrap();
720        let new_first = reparsed.chunk_at_path(&[0]).unwrap();
721        assert_eq!(new_first.data(), marker.as_slice());
722    }
723
724    #[test]
725    fn replace_leaf_rejects_empty_path() {
726        let original = read_corpus("chicken.djvu");
727        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
728        let err = doc.replace_leaf(&[], vec![]).unwrap_err();
729        assert!(matches!(err, MutError::EmptyPath));
730    }
731
732    #[test]
733    fn replace_leaf_rejects_out_of_range() {
734        let original = read_corpus("chicken.djvu");
735        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
736        let err = doc.replace_leaf(&[9999], vec![]).unwrap_err();
737        assert!(matches!(err, MutError::PathOutOfRange { .. }));
738    }
739
740    #[test]
741    fn replace_leaf_rejects_traversing_leaf() {
742        let original = read_corpus("chicken.djvu");
743        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
744        // [0] is a leaf (INFO).  [0, 0] tries to descend past it.
745        let err = doc.replace_leaf(&[0, 0], vec![]).unwrap_err();
746        assert!(matches!(err, MutError::PathTraversesLeaf { .. }));
747    }
748
749    #[test]
750    fn replace_leaf_rejects_form_target() {
751        // For a DJVM bundle, [N] for some N points at a FORM:DJVU page,
752        // not a leaf.  Picking the last child of DjVu3Spec_bundled (which
753        // is a page FORM) demonstrates NotALeaf.
754        let original = read_corpus("DjVu3Spec_bundled.djvu");
755        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
756        let last_idx = doc.root_child_count() - 1;
757        let err = doc.replace_leaf(&[last_idx], vec![]).unwrap_err();
758        assert!(matches!(err, MutError::NotALeaf));
759    }
760
761    #[test]
762    fn root_form_type_djvu_single_page() {
763        let original = read_corpus("chicken.djvu");
764        let doc = DjVuDocumentMut::from_bytes(&original).unwrap();
765        assert_eq!(doc.root_form_type(), Some(b"DJVU"));
766    }
767
768    // ---- PR2 setters ------------------------------------------------------
769
770    #[test]
771    fn page_count_single_page_djvu_is_one() {
772        let original = read_corpus("chicken.djvu");
773        let doc = DjVuDocumentMut::from_bytes(&original).unwrap();
774        assert_eq!(doc.page_count(), 1);
775    }
776
777    #[test]
778    fn page_count_djvm_bundle_counts_djvu_components_only() {
779        let original = read_corpus("DjVu3Spec_bundled.djvu");
780        let doc = DjVuDocumentMut::from_bytes(&original).unwrap();
781        // The bundle has multiple FORM:DJVU pages; assert it's > 1 and matches
782        // the count of DJVU children at the root.
783        let direct: usize = doc
784            .file
785            .root
786            .children()
787            .iter()
788            .filter(|c| {
789                matches!(c, crate::iff::Chunk::Form { secondary_id, .. } if secondary_id == b"DJVU")
790            })
791            .count();
792        assert!(direct >= 2, "expected multi-page bundle, got {direct}");
793        assert_eq!(doc.page_count(), direct);
794    }
795
796    #[test]
797    fn page_mut_out_of_range_errors() {
798        let original = read_corpus("chicken.djvu");
799        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
800        let err = doc.page_mut(1).err().unwrap();
801        assert!(matches!(
802            err,
803            MutError::PageOutOfRange { index: 1, count: 1 }
804        ));
805    }
806
807    #[test]
808    fn page_mut_djvm_bundle_succeeds_after_pr3() {
809        // PR3 enables page_mut on bundled FORM:DJVM. Verify it returns a
810        // valid handle for index 0 and rejects out-of-range indices.
811        let original = read_corpus("DjVu3Spec_bundled.djvu");
812        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
813        assert!(doc.page_mut(0).is_ok());
814        let count = doc.page_count();
815        let err = doc.page_mut(count).err().unwrap();
816        assert!(matches!(err, MutError::PageOutOfRange { .. }));
817    }
818
819    #[test]
820    fn set_text_layer_roundtrip_chicken() {
821        use crate::text::{Rect, TextLayer, TextZone, TextZoneKind};
822
823        let original = read_corpus("chicken.djvu");
824        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
825
826        let layer = TextLayer {
827            text: "hello world".to_string(),
828            zones: vec![TextZone {
829                kind: TextZoneKind::Page,
830                rect: Rect {
831                    x: 0,
832                    y: 0,
833                    width: 100,
834                    height: 50,
835                },
836                text: "hello world".to_string(),
837                children: vec![],
838            }],
839        };
840        doc.page_mut(0).unwrap().set_text_layer(&layer).unwrap();
841        assert!(doc.is_dirty());
842        let edited = doc.into_bytes();
843
844        // Re-parse and confirm a TXTz chunk now exists.
845        let reparsed = DjVuDocumentMut::from_bytes(&edited).unwrap();
846        let has_txtz = reparsed
847            .file
848            .root
849            .children()
850            .iter()
851            .any(|c| matches!(c, Chunk::Leaf { id, .. } if id == b"TXTz"));
852        assert!(
853            has_txtz,
854            "TXTz chunk should be present after set_text_layer"
855        );
856    }
857
858    #[test]
859    fn set_annotations_roundtrip_chicken() {
860        use crate::annotation::{Annotation, Color};
861
862        let original = read_corpus("chicken.djvu");
863        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
864
865        let mut ann = Annotation::default();
866        ann.background = Some(Color {
867            r: 0xFF,
868            g: 0xFF,
869            b: 0xFF,
870        });
871        ann.mode = Some("color".to_string());
872        doc.page_mut(0).unwrap().set_annotations(&ann, &[]);
873        let edited = doc.into_bytes();
874
875        let reparsed = DjVuDocumentMut::from_bytes(&edited).unwrap();
876        let antz = reparsed
877            .file
878            .root
879            .children()
880            .iter()
881            .find(|c| matches!(c, Chunk::Leaf { id, .. } if id == b"ANTz"));
882        assert!(antz.is_some(), "ANTz should be inserted");
883        let data = antz.unwrap().data();
884        let (parsed_ann, _areas) =
885            crate::annotation::parse_annotations_bzz(data).expect("ANTz must round-trip");
886        assert_eq!(parsed_ann.mode.as_deref(), Some("color"));
887        assert_eq!(
888            parsed_ann.background,
889            Some(Color {
890                r: 0xFF,
891                g: 0xFF,
892                b: 0xFF
893            })
894        );
895    }
896
897    #[test]
898    fn set_metadata_roundtrip_chicken() {
899        let original = read_corpus("chicken.djvu");
900        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
901
902        let mut meta = DjVuMetadata::default();
903        meta.title = Some("Test Title".into());
904        meta.author = Some("Tester".into());
905        doc.page_mut(0).unwrap().set_metadata(&meta);
906        let edited = doc.into_bytes();
907
908        let reparsed = DjVuDocumentMut::from_bytes(&edited).unwrap();
909        let metz = reparsed
910            .file
911            .root
912            .children()
913            .iter()
914            .find(|c| matches!(c, Chunk::Leaf { id, .. } if id == b"METz"))
915            .expect("METz should be inserted");
916        let parsed = crate::metadata::parse_metadata_bzz(metz.data()).unwrap();
917        assert_eq!(parsed, meta);
918    }
919
920    #[test]
921    fn set_metadata_empty_removes_existing_chunk() {
922        let original = read_corpus("chicken.djvu");
923        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
924
925        // Insert one, then clear.
926        let mut meta = DjVuMetadata::default();
927        meta.title = Some("X".into());
928        doc.page_mut(0).unwrap().set_metadata(&meta);
929        doc.page_mut(0)
930            .unwrap()
931            .set_metadata(&DjVuMetadata::default());
932
933        let edited = doc.into_bytes();
934        let reparsed = DjVuDocumentMut::from_bytes(&edited).unwrap();
935        let any_meta = reparsed
936            .file
937            .root
938            .children()
939            .iter()
940            .any(|c| matches!(c, Chunk::Leaf { id, .. } if id == b"METa" || id == b"METz"));
941        assert!(!any_meta, "set_metadata(empty) should remove any METa/METz");
942    }
943
944    #[test]
945    fn set_metadata_replaces_existing_chunk_in_place() {
946        let original = read_corpus("chicken.djvu");
947        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
948
949        let mut m1 = DjVuMetadata::default();
950        m1.title = Some("First".into());
951        doc.page_mut(0).unwrap().set_metadata(&m1);
952
953        let mut m2 = DjVuMetadata::default();
954        m2.title = Some("Second".into());
955        doc.page_mut(0).unwrap().set_metadata(&m2);
956
957        let edited = doc.into_bytes();
958        let reparsed = DjVuDocumentMut::from_bytes(&edited).unwrap();
959        let metz_count = reparsed
960            .file
961            .root
962            .children()
963            .iter()
964            .filter(|c| matches!(c, Chunk::Leaf { id, .. } if id == b"METa" || id == b"METz"))
965            .count();
966        assert_eq!(metz_count, 1, "should not duplicate METz on repeat set");
967    }
968
969    // ---- PR3: bundled DJVM mutation + set_bookmarks -----------------------
970
971    /// Helper: parse the FORM:DJVM body, return the DIRM chunk's offset table
972    /// and the actual file offsets where each component FORM header sits.
973    fn dirm_offsets_and_actual(data: &[u8]) -> (Vec<u32>, Vec<u32>) {
974        // Parse top-level FORM
975        let form = crate::iff::parse_form(data).expect("parse_form");
976        assert_eq!(&form.form_type, b"DJVM");
977
978        let dirm = form
979            .chunks
980            .iter()
981            .find(|c| &c.id == b"DIRM")
982            .expect("DIRM present");
983        let payload = dirm.data;
984        let nfiles = u16::from_be_bytes([payload[1], payload[2]]) as usize;
985        let mut declared = Vec::with_capacity(nfiles);
986        for i in 0..nfiles {
987            let base = 3 + i * 4;
988            declared.push(u32::from_be_bytes([
989                payload[base],
990                payload[base + 1],
991                payload[base + 2],
992                payload[base + 3],
993            ]));
994        }
995
996        // Walk the file to find each FORM child's absolute byte offset.
997        // Layout: AT&T(4) FORM(4) length(4) DJVM(4) chunks…
998        let mut actual = Vec::with_capacity(nfiles);
999        let mut pos = 16usize;
1000        let body_end = 8 + u32::from_be_bytes([data[8], data[9], data[10], data[11]]) as usize;
1001        while pos < body_end {
1002            let id = &data[pos..pos + 4];
1003            let len =
1004                u32::from_be_bytes([data[pos + 4], data[pos + 5], data[pos + 6], data[pos + 7]])
1005                    as usize;
1006            if id == b"FORM" {
1007                actual.push(pos as u32);
1008            }
1009            let mut next = pos + 8 + len;
1010            if next & 1 == 1 {
1011                next += 1;
1012            }
1013            pos = next;
1014        }
1015        (declared, actual)
1016    }
1017
1018    #[test]
1019    fn dirm_offsets_match_actual_after_no_edit() {
1020        // Sanity: even without edits, the recompute path agrees with the
1021        // original document layout on a real bundle.
1022        let original = read_corpus("DjVu3Spec_bundled.djvu");
1023        let (declared, actual) = dirm_offsets_and_actual(&original);
1024        assert_eq!(declared, actual);
1025    }
1026
1027    #[test]
1028    fn dirm_offsets_recomputed_after_page_metadata_edit() {
1029        let original = read_corpus("DjVu3Spec_bundled.djvu");
1030        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
1031
1032        // Edit page 0's metadata so the page FORM grows.
1033        let mut meta = DjVuMetadata::default();
1034        meta.title = Some("PR3 DJVM bundled mutation".into());
1035        meta.author = Some("djvu-rs PR3 tests".into());
1036        doc.page_mut(0).unwrap().set_metadata(&meta);
1037        assert!(doc.is_dirty());
1038
1039        let edited = doc.into_bytes();
1040        // Sizes must have changed (metadata chunk was inserted).
1041        assert_ne!(edited.len(), original.len());
1042
1043        // DIRM offsets in the new bytes must match where the FORM headers
1044        // actually live.
1045        let (declared, actual) = dirm_offsets_and_actual(&edited);
1046        assert_eq!(
1047            declared, actual,
1048            "DIRM offsets must point at the new FORM positions after edit"
1049        );
1050
1051        // The full document must still parse via DjVuDocument and expose the
1052        // expected page count.
1053        let reparsed =
1054            crate::djvu_document::DjVuDocument::parse(&edited).expect("edited bundle must parse");
1055        let original_doc =
1056            crate::djvu_document::DjVuDocument::parse(&original).expect("original bundle parses");
1057        assert_eq!(reparsed.page_count(), original_doc.page_count());
1058    }
1059
1060    #[test]
1061    fn dirm_offsets_recomputed_after_middle_page_edit() {
1062        // Editing a non-first page must shift only the trailing offsets.
1063        let original = read_corpus("DjVu3Spec_bundled.djvu");
1064        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
1065        let count = doc.page_count();
1066        assert!(count >= 3);
1067
1068        let mid = count / 2;
1069        let mut meta = DjVuMetadata::default();
1070        meta.title = Some("PR3 mid-page edit".into());
1071        doc.page_mut(mid).unwrap().set_metadata(&meta);
1072
1073        let edited = doc.into_bytes();
1074        let (declared, actual) = dirm_offsets_and_actual(&edited);
1075        assert_eq!(declared, actual);
1076
1077        // Pages before `mid` should have unchanged offsets vs. the original.
1078        let (orig_declared, _) = dirm_offsets_and_actual(&original);
1079        for i in 0..mid {
1080            assert_eq!(
1081                declared[i], orig_declared[i],
1082                "offset for page {i} (before edit) must be unchanged"
1083            );
1084        }
1085    }
1086
1087    #[test]
1088    fn set_bookmarks_replaces_navm_in_bundle() {
1089        use crate::djvu_document::DjVuBookmark;
1090
1091        let original = read_corpus("DjVu3Spec_bundled.djvu");
1092        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
1093
1094        let bookmarks = vec![
1095            DjVuBookmark {
1096                title: "Front matter".into(),
1097                url: "#1".into(),
1098                children: vec![DjVuBookmark {
1099                    title: "Acknowledgments".into(),
1100                    url: "#3".into(),
1101                    children: vec![],
1102                }],
1103            },
1104            DjVuBookmark {
1105                title: "Body".into(),
1106                url: "#10".into(),
1107                children: vec![],
1108            },
1109        ];
1110        doc.set_bookmarks(&bookmarks).unwrap();
1111        assert!(doc.is_dirty());
1112        let edited = doc.into_bytes();
1113
1114        // DIRM offsets must still be correct after the NAVM size change.
1115        let (declared, actual) = dirm_offsets_and_actual(&edited);
1116        assert_eq!(declared, actual);
1117
1118        // Round-trip the bookmarks via the high-level DjVuDocument parser.
1119        let reparsed = crate::djvu_document::DjVuDocument::parse(&edited)
1120            .expect("bundle with new bookmarks parses");
1121        let parsed_bms = reparsed.bookmarks();
1122        assert_eq!(parsed_bms.len(), 2);
1123        assert_eq!(parsed_bms[0].title, "Front matter");
1124        assert_eq!(parsed_bms[0].children.len(), 1);
1125        assert_eq!(parsed_bms[0].children[0].title, "Acknowledgments");
1126        assert_eq!(parsed_bms[1].title, "Body");
1127    }
1128
1129    #[test]
1130    fn set_bookmarks_empty_removes_navm() {
1131        let original = read_corpus("DjVu3Spec_bundled.djvu");
1132        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
1133        // The fixture might or might not have NAVM; either way, calling with
1134        // an empty slice should result in no NAVM in the output.
1135        doc.set_bookmarks(&[]).unwrap();
1136        let edited = doc.into_bytes();
1137
1138        let form = crate::iff::parse_form(&edited).unwrap();
1139        let has_navm = form.chunks.iter().any(|c| &c.id == b"NAVM");
1140        assert!(!has_navm, "set_bookmarks(&[]) must remove NAVM");
1141
1142        // DIRM offsets still match.
1143        let (declared, actual) = dirm_offsets_and_actual(&edited);
1144        assert_eq!(declared, actual);
1145    }
1146
1147    #[test]
1148    fn set_bookmarks_inserts_navm_when_absent() {
1149        use crate::djvu_document::DjVuBookmark;
1150
1151        // Build a bundle that has no NAVM by first stripping it, then
1152        // re-add bookmarks via set_bookmarks.
1153        let original = read_corpus("DjVu3Spec_bundled.djvu");
1154        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
1155        doc.set_bookmarks(&[]).unwrap();
1156        let stripped = doc.into_bytes();
1157
1158        let mut doc = DjVuDocumentMut::from_bytes(&stripped).unwrap();
1159        let bms = vec![DjVuBookmark {
1160            title: "Re-added".into(),
1161            url: "#1".into(),
1162            children: vec![],
1163        }];
1164        doc.set_bookmarks(&bms).unwrap();
1165        let edited = doc.into_bytes();
1166
1167        let form = crate::iff::parse_form(&edited).unwrap();
1168        let navm_pos = form
1169            .chunks
1170            .iter()
1171            .position(|c| &c.id == b"NAVM")
1172            .expect("NAVM should be inserted");
1173        let dirm_pos = form.chunks.iter().position(|c| &c.id == b"DIRM").unwrap();
1174        assert_eq!(
1175            navm_pos,
1176            dirm_pos + 1,
1177            "NAVM should be placed immediately after DIRM"
1178        );
1179
1180        let (declared, actual) = dirm_offsets_and_actual(&edited);
1181        assert_eq!(declared, actual);
1182    }
1183
1184    #[test]
1185    fn set_bookmarks_on_single_page_djvu_errors() {
1186        let original = read_corpus("chicken.djvu");
1187        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
1188        let err = doc.set_bookmarks(&[]).err().unwrap();
1189        assert!(matches!(err, MutError::BookmarksRequireDjvm));
1190    }
1191
1192    #[test]
1193    fn page_mut_djvm_text_layer_roundtrip() {
1194        use crate::text::{Rect, TextLayer, TextZone, TextZoneKind};
1195
1196        let original = read_corpus("DjVu3Spec_bundled.djvu");
1197        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
1198        let layer = TextLayer {
1199            text: "djvm page-3 text".into(),
1200            zones: vec![TextZone {
1201                kind: TextZoneKind::Page,
1202                rect: Rect {
1203                    x: 0,
1204                    y: 0,
1205                    width: 100,
1206                    height: 50,
1207                },
1208                text: "djvm page-3 text".into(),
1209                children: vec![],
1210            }],
1211        };
1212        doc.page_mut(2).unwrap().set_text_layer(&layer).unwrap();
1213        let edited = doc.into_bytes();
1214
1215        let (declared, actual) = dirm_offsets_and_actual(&edited);
1216        assert_eq!(declared, actual);
1217
1218        // Re-open and confirm the targeted page now has a TXTz chunk.
1219        let reparsed = DjVuDocumentMut::from_bytes(&edited).unwrap();
1220        // The third FORM:DJVU child should have a TXTz leaf.
1221        let mut djvu_seen = 0usize;
1222        let mut found_txtz = false;
1223        for child in reparsed.file.root.children() {
1224            if let Chunk::Form {
1225                secondary_id,
1226                children,
1227                ..
1228            } = child
1229                && secondary_id == b"DJVU"
1230            {
1231                if djvu_seen == 2 {
1232                    found_txtz = children
1233                        .iter()
1234                        .any(|c| matches!(c, Chunk::Leaf { id, .. } if id == b"TXTz"));
1235                    break;
1236                }
1237                djvu_seen += 1;
1238            }
1239        }
1240        assert!(
1241            found_txtz,
1242            "TXTz chunk should be present on page 2 after set_text_layer"
1243        );
1244    }
1245
1246    /// PR4 of #222: editing one page in a bundled DJVM must leave every
1247    /// other page's bytes unchanged. The mutated page itself may grow
1248    /// (e.g. a new METz chunk), but unmutated FORM:DJVU/DJVI components
1249    /// must round-trip byte-identical.
1250    #[test]
1251    fn unmutated_pages_byte_identical_after_metadata_edit() {
1252        use crate::metadata::DjVuMetadata;
1253
1254        let original = read_corpus("DjVu3Spec_bundled.djvu");
1255
1256        let orig_ranges = top_form_ranges(&original);
1257
1258        let mut doc = DjVuDocumentMut::from_bytes(&original).unwrap();
1259        let meta = DjVuMetadata {
1260            title: Some("PR4 byte-identical probe".into()),
1261            ..Default::default()
1262        };
1263        doc.page_mut(0).unwrap().set_metadata(&meta);
1264        let edited = doc.into_bytes();
1265
1266        let edited_ranges = top_form_ranges(&edited);
1267        assert_eq!(orig_ranges.len(), edited_ranges.len());
1268
1269        // The first FORM:DJVU child corresponds to page 0 (the one we edited);
1270        // it is allowed to differ. All others must be byte-identical.
1271        let mut djvu_idx = 0usize;
1272        for (i, (or, er)) in orig_ranges.iter().zip(edited_ranges.iter()).enumerate() {
1273            // Only enforce identity on FORM:DJVU/DJVI components — bare leaves
1274            // (DIRM, NAVM) legitimately change when offsets shift.
1275            let is_form_djvu = &original[or.start..or.start + 4] == b"FORM"
1276                && (&original[or.start + 8..or.start + 12] == b"DJVU"
1277                    || &original[or.start + 8..or.start + 12] == b"DJVI");
1278            if !is_form_djvu {
1279                continue;
1280            }
1281            let is_edited_page = djvu_idx == 0;
1282            djvu_idx += 1;
1283            if is_edited_page {
1284                continue;
1285            }
1286            assert_eq!(
1287                &original[or.clone()],
1288                &edited[er.clone()],
1289                "FORM at top-level child #{i} must be byte-identical after edit"
1290            );
1291        }
1292    }
1293
1294    /// Walk top-level children of the outer FORM and return their absolute
1295    /// byte ranges (header+payload+pad).
1296    fn top_form_ranges(data: &[u8]) -> Vec<core::ops::Range<usize>> {
1297        assert_eq!(&data[..4], b"AT&T");
1298        let form_len = u32::from_be_bytes([data[8], data[9], data[10], data[11]]) as usize;
1299        let body_end = 12 + form_len;
1300        let mut pos = 16usize; // skip AT&T(4) + FORM(4) + len(4) + secondary_id(4)
1301        let mut out = Vec::new();
1302        while pos + 8 <= body_end {
1303            let len =
1304                u32::from_be_bytes([data[pos + 4], data[pos + 5], data[pos + 6], data[pos + 7]])
1305                    as usize;
1306            let mut next = pos + 8 + len;
1307            if next & 1 == 1 && next < body_end {
1308                next += 1;
1309            }
1310            out.push(pos..next);
1311            pos = next;
1312        }
1313        out
1314    }
1315}