Skip to main content

djvu_rs/
text.rs

1//! DjVu text layer parser — phase 4.
2//!
3//! Parses TXTa (plain) and TXTz (BZZ-compressed) text layer chunks into a
4//! structured zone hierarchy with remapped coordinates.
5//!
6//! ## Key public types
7//!
8//! - [`TextLayer`] — the full text content and zone hierarchy of a page
9//! - [`TextZone`] — a single zone node (page/column/para/line/word/char)
10//! - [`TextZoneKind`] — enum discriminating zone types
11//! - `Rect` — bounding rectangle in top-left-origin coordinates
12//! - `TextError` — typed errors from this module
13//!
14//! ## Format notes
15//!
16//! The TXTa/TXTz binary format stores:
17//!   `[u24be text_len][utf8 text][u8 version][zone tree]`
18//!
19//! Zone coordinates use DjVu's bottom-left origin. This module remaps all
20//! coordinates to a top-left origin using the provided page height.
21//!
22//! Zone fields are delta-encoded relative to a parent or previous sibling.
23
24#[cfg(not(feature = "std"))]
25use alloc::{
26    string::{String, ToString},
27    vec::Vec,
28};
29
30use crate::{bzz_new::bzz_decode, error::BzzError, info::Rotation};
31
32// ---- Error ------------------------------------------------------------------
33
34/// Errors from text layer parsing.
35#[derive(Debug, thiserror::Error)]
36pub enum TextError {
37    /// BZZ decompression failed.
38    #[error("bzz decode failed: {0}")]
39    Bzz(#[from] BzzError),
40
41    /// The binary data is too short to be a valid text layer.
42    #[error("text layer data too short")]
43    TooShort,
44
45    /// A text length field points past the end of the data.
46    #[error("text length overflows data")]
47    TextOverflow,
48
49    /// The text bytes are not valid UTF-8.
50    #[error("invalid UTF-8 in text layer")]
51    InvalidUtf8,
52
53    /// A zone record is truncated (not enough bytes for a field).
54    #[error("zone record truncated at offset {0}")]
55    ZoneTruncated(usize),
56
57    /// An unknown zone type byte was encountered.
58    #[error("unknown zone type {0}")]
59    UnknownZoneType(u8),
60}
61
62// ---- Public types -----------------------------------------------------------
63
64/// Zone type discriminant in the DjVu text layer hierarchy.
65#[derive(Debug, Clone, Copy, PartialEq, Eq)]
66#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
67pub enum TextZoneKind {
68    Page,
69    Column,
70    Region,
71    Para,
72    Line,
73    Word,
74    Character,
75}
76
77/// Bounding rectangle in top-left-origin coordinates (pixels).
78#[derive(Debug, Clone, PartialEq, Eq)]
79#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
80pub struct Rect {
81    pub x: u32,
82    pub y: u32,
83    pub width: u32,
84    pub height: u32,
85}
86
87/// A single node in the text zone hierarchy.
88#[derive(Debug, Clone)]
89#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
90pub struct TextZone {
91    /// Zone type.
92    pub kind: TextZoneKind,
93    /// Bounding box (top-left origin, after coordinate remap).
94    pub rect: Rect,
95    /// Text covered by this zone (substring of [`TextLayer::text`]).
96    pub text: String,
97    /// Child zones (columns inside page, words inside line, etc.).
98    pub children: Vec<TextZone>,
99}
100
101/// The complete text layer of a DjVu page.
102#[derive(Debug, Clone)]
103#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
104pub struct TextLayer {
105    /// Full plain-text content of the page, UTF-8.
106    pub text: String,
107    /// Top-level zone nodes (usually a single `Page` zone).
108    pub zones: Vec<TextZone>,
109}
110
111impl TextLayer {
112    /// Return a copy of this text layer with all zone rectangles transformed to
113    /// match a rendered page of size `render_w × render_h`.
114    ///
115    /// - `page_w`, `page_h` — native page dimensions from the INFO chunk.
116    /// - `rotation` — page rotation from the INFO chunk.
117    /// - `render_w`, `render_h` — the pixel size of the rendered output.
118    ///
119    /// Applies rotation first (in native pixel space), then scales the result
120    /// proportionally to the requested render size.  The text content is
121    /// preserved unchanged.
122    pub fn transform(
123        &self,
124        page_w: u32,
125        page_h: u32,
126        rotation: Rotation,
127        render_w: u32,
128        render_h: u32,
129    ) -> Self {
130        let (disp_w, disp_h) = match rotation {
131            Rotation::Cw90 | Rotation::Ccw90 => (page_h, page_w),
132            _ => (page_w, page_h),
133        };
134        let t = ZoneTransform {
135            page_w,
136            page_h,
137            rotation,
138            disp_w,
139            disp_h,
140            render_w,
141            render_h,
142        };
143        let zones = self.zones.iter().map(|z| transform_zone(z, &t)).collect();
144        TextLayer {
145            text: self.text.clone(),
146            zones,
147        }
148    }
149}
150
151// ---- Coordinate helpers -----------------------------------------------------
152
153impl Rect {
154    /// Rotate this rectangle within a `page_w × page_h` native coordinate space.
155    ///
156    /// Coordinates are in top-left origin.  Returns the transformed rect in the
157    /// rotated display space (which has dimensions `page_h × page_w` for 90°
158    /// rotations and `page_w × page_h` for 0°/180°).
159    pub fn rotate(&self, page_w: u32, page_h: u32, rotation: Rotation) -> Self {
160        match rotation {
161            Rotation::None => self.clone(),
162            Rotation::Rot180 => Rect {
163                x: page_w.saturating_sub(self.x.saturating_add(self.width)),
164                y: page_h.saturating_sub(self.y.saturating_add(self.height)),
165                width: self.width,
166                height: self.height,
167            },
168            // Clockwise 90°: displayed page is page_h wide × page_w tall.
169            // (x, y, w, h) → (page_h - y - h,  x,  h,  w)
170            Rotation::Cw90 => Rect {
171                x: page_h.saturating_sub(self.y.saturating_add(self.height)),
172                y: self.x,
173                width: self.height,
174                height: self.width,
175            },
176            // Counter-clockwise 90°: displayed page is page_h wide × page_w tall.
177            // (x, y, w, h) → (y,  page_w - x - w,  h,  w)
178            Rotation::Ccw90 => Rect {
179                x: self.y,
180                y: page_w.saturating_sub(self.x.saturating_add(self.width)),
181                width: self.height,
182                height: self.width,
183            },
184        }
185    }
186
187    /// Scale this rectangle from a `from_w × from_h` space to `to_w × to_h`.
188    pub fn scale(&self, from_w: u32, from_h: u32, to_w: u32, to_h: u32) -> Self {
189        if from_w == 0 || from_h == 0 {
190            return self.clone();
191        }
192        Rect {
193            x: (self.x as u64 * to_w as u64 / from_w as u64) as u32,
194            y: (self.y as u64 * to_h as u64 / from_h as u64) as u32,
195            width: (self.width as u64 * to_w as u64 / from_w as u64) as u32,
196            height: (self.height as u64 * to_h as u64 / from_h as u64) as u32,
197        }
198    }
199}
200
201/// Parameters for `transform_zone` — groups the 7 invariants so we stay
202/// under clippy's `too_many_arguments` limit.
203struct ZoneTransform {
204    page_w: u32,
205    page_h: u32,
206    rotation: Rotation,
207    disp_w: u32,
208    disp_h: u32,
209    render_w: u32,
210    render_h: u32,
211}
212
213fn transform_zone(zone: &TextZone, t: &ZoneTransform) -> TextZone {
214    let rotated = zone.rect.rotate(t.page_w, t.page_h, t.rotation);
215    let scaled = rotated.scale(t.disp_w, t.disp_h, t.render_w, t.render_h);
216    let children = zone.children.iter().map(|c| transform_zone(c, t)).collect();
217    TextZone {
218        kind: zone.kind,
219        rect: scaled,
220        text: zone.text.clone(),
221        children,
222    }
223}
224
225// ---- Entry points -----------------------------------------------------------
226
227/// Parse a TXTa (uncompressed) text layer chunk.
228///
229/// `page_height` is used to remap DjVu bottom-left coordinates to top-left.
230pub fn parse_text_layer(data: &[u8], page_height: u32) -> Result<TextLayer, TextError> {
231    parse_text_layer_inner(data, page_height)
232}
233
234/// Parse a TXTz (BZZ-compressed) text layer chunk.
235///
236/// Decompresses with BZZ first, then delegates to [`parse_text_layer`].
237pub fn parse_text_layer_bzz(data: &[u8], page_height: u32) -> Result<TextLayer, TextError> {
238    let decoded = bzz_decode(data)?;
239    parse_text_layer_inner(&decoded, page_height)
240}
241
242// ---- Internal parsing -------------------------------------------------------
243
244fn parse_text_layer_inner(data: &[u8], page_height: u32) -> Result<TextLayer, TextError> {
245    if data.len() < 3 {
246        return Err(TextError::TooShort);
247    }
248
249    let mut pos = 0usize;
250
251    // Read text length (u24be)
252    let text_len = read_u24(data, &mut pos).ok_or(TextError::TooShort)?;
253
254    // Read UTF-8 text
255    let text_end = pos.checked_add(text_len).ok_or(TextError::TextOverflow)?;
256    if text_end > data.len() {
257        return Err(TextError::TextOverflow);
258    }
259    let text = core::str::from_utf8(data.get(pos..text_end).ok_or(TextError::TextOverflow)?)
260        .map_err(|_| TextError::InvalidUtf8)?
261        .to_string();
262    pos = text_end;
263
264    // Consume version byte (if present)
265    if pos < data.len() {
266        pos += 1; // version byte — currently unused
267    }
268
269    // Parse zone tree
270    let mut zones = Vec::new();
271    if pos < data.len() {
272        let zone = parse_zone(data, &mut pos, None, None, &text, page_height)?;
273        zones.push(zone);
274    }
275
276    Ok(TextLayer { text, zones })
277}
278
279// ---- Zone parsing -----------------------------------------------------------
280
281/// Delta-encoding context carried from one zone parse to the next.
282#[derive(Clone)]
283struct ZoneCtx {
284    x: i32,
285    y: i32, // bottom-left y (DjVu native)
286    width: i32,
287    height: i32,
288    text_start: i32,
289    text_len: i32,
290}
291
292fn parse_zone(
293    data: &[u8],
294    pos: &mut usize,
295    parent: Option<&ZoneCtx>,
296    prev: Option<&ZoneCtx>,
297    full_text: &str,
298    page_height: u32,
299) -> Result<TextZone, TextError> {
300    if *pos >= data.len() {
301        return Err(TextError::ZoneTruncated(*pos));
302    }
303
304    let type_byte = *data.get(*pos).ok_or(TextError::ZoneTruncated(*pos))?;
305    *pos += 1;
306
307    let kind = match type_byte {
308        1 => TextZoneKind::Page,
309        2 => TextZoneKind::Column,
310        3 => TextZoneKind::Region,
311        4 => TextZoneKind::Para,
312        5 => TextZoneKind::Line,
313        6 => TextZoneKind::Word,
314        7 => TextZoneKind::Character,
315        other => return Err(TextError::UnknownZoneType(other)),
316    };
317
318    let mut x = read_i16_biased(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
319    let mut y = read_i16_biased(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
320    let width = read_i16_biased(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
321    let height = read_i16_biased(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
322    let mut text_start = read_i16_biased(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
323    let text_len = read_i24(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
324
325    // Apply delta encoding (matches djvujs DjVuText.js decodeZone logic)
326    if let Some(prev) = prev {
327        match type_byte {
328            1 | 4 | 5 => {
329                // PAGE, PARAGRAPH, LINE
330                x += prev.x;
331                y = prev.y - (y + height);
332            }
333            _ => {
334                // COLUMN, REGION, WORD, CHARACTER
335                x += prev.x + prev.width;
336                y += prev.y;
337            }
338        }
339        text_start += prev.text_start + prev.text_len;
340    } else if let Some(parent) = parent {
341        x += parent.x;
342        y = parent.y + parent.height - (y + height);
343        text_start += parent.text_start;
344    }
345
346    // Remap y from DjVu bottom-left to top-left
347    // top_left_y = page_height - (bl_y + height)
348    let tl_y = (page_height as i32)
349        .saturating_sub(y.saturating_add(height))
350        .max(0) as u32;
351    let tl_x = x.max(0) as u32;
352    let tl_w = width.max(0) as u32;
353    let tl_h = height.max(0) as u32;
354
355    let rect = Rect {
356        x: tl_x,
357        y: tl_y,
358        width: tl_w,
359        height: tl_h,
360    };
361
362    // Extract zone text
363    let ts = text_start.max(0) as usize;
364    let tl = text_len.max(0) as usize;
365    let zone_text = extract_text_slice(full_text, ts, tl);
366
367    let children_count = read_i24(data, pos)
368        .ok_or(TextError::ZoneTruncated(*pos))?
369        .max(0) as usize;
370
371    let ctx = ZoneCtx {
372        x,
373        y,
374        width,
375        height,
376        text_start,
377        text_len,
378    };
379
380    let mut children = Vec::with_capacity(children_count);
381    let mut prev_child: Option<ZoneCtx> = None;
382
383    for _ in 0..children_count {
384        let child = parse_zone(
385            data,
386            pos,
387            Some(&ctx),
388            prev_child.as_ref(),
389            full_text,
390            page_height,
391        )?;
392        prev_child = Some(ZoneCtx {
393            x: child.rect.x as i32,
394            y: {
395                // We need to store the original bottom-left y for delta calc.
396                // Inverse remap: bl_y = page_height - (tl_y + height)
397                (page_height as i32).saturating_sub(child.rect.y as i32 + child.rect.height as i32)
398            },
399            width: child.rect.width as i32,
400            height: child.rect.height as i32,
401            text_start: ts as i32,
402            text_len: tl as i32,
403        });
404        children.push(child);
405    }
406
407    Ok(TextZone {
408        kind,
409        rect,
410        text: zone_text,
411        children,
412    })
413}
414
415/// Extract a substring from `full_text` starting at byte offset `start` with byte length `len`.
416///
417/// Clamps to valid char boundaries to avoid panics on multi-byte UTF-8.
418fn extract_text_slice(full_text: &str, start: usize, len: usize) -> String {
419    let end = start.saturating_add(len).min(full_text.len());
420    let start = start.min(end);
421    // Walk back to a valid char boundary
422    let safe_start = (0..=start)
423        .rev()
424        .find(|&i| full_text.is_char_boundary(i))
425        .unwrap_or(0);
426    let safe_end = (end..=full_text.len())
427        .find(|&i| full_text.is_char_boundary(i))
428        .unwrap_or(full_text.len());
429    full_text[safe_start..safe_end].to_string()
430}
431
432// ---- Low-level readers (no indexing, no unwrap) -----------------------------
433
434/// Read 3 bytes as a u24 big-endian value; advance `pos` by 3. Returns None if truncated.
435fn read_u24(data: &[u8], pos: &mut usize) -> Option<usize> {
436    let b0 = *data.get(*pos)?;
437    let b1 = *data.get(*pos + 1)?;
438    let b2 = *data.get(*pos + 2)?;
439    *pos += 3;
440    Some(((b0 as usize) << 16) | ((b1 as usize) << 8) | (b2 as usize))
441}
442
443/// Read 2 bytes as a biased i16 (raw u16 − 0x8000). Returns None if truncated.
444fn read_i16_biased(data: &[u8], pos: &mut usize) -> Option<i32> {
445    let b0 = *data.get(*pos)?;
446    let b1 = *data.get(*pos + 1)?;
447    *pos += 2;
448    let raw = u16::from_be_bytes([b0, b1]);
449    Some(raw as i32 - 0x8000)
450}
451
452/// Read 3 bytes as a signed i24 big-endian. Returns None if truncated.
453fn read_i24(data: &[u8], pos: &mut usize) -> Option<i32> {
454    let b0 = *data.get(*pos)? as i32;
455    let b1 = *data.get(*pos + 1)? as i32;
456    let b2 = *data.get(*pos + 2)? as i32;
457    *pos += 3;
458    Some((b0 << 16) | (b1 << 8) | b2)
459}
460
461#[cfg(test)]
462mod tests {
463    use super::*;
464
465    // ── Low-level reader tests ──────────────────────────────────────────────
466
467    #[test]
468    fn test_read_u24() {
469        let data = [0x01, 0x02, 0x03];
470        let mut pos = 0;
471        assert_eq!(read_u24(&data, &mut pos), Some(0x010203));
472        assert_eq!(pos, 3);
473    }
474
475    #[test]
476    fn test_read_u24_truncated() {
477        let data = [0x01, 0x02];
478        let mut pos = 0;
479        assert_eq!(read_u24(&data, &mut pos), None);
480    }
481
482    #[test]
483    fn test_read_i16_biased() {
484        let data = [0x80, 0x00]; // 0x8000 - 0x8000 = 0
485        let mut pos = 0;
486        assert_eq!(read_i16_biased(&data, &mut pos), Some(0));
487        assert_eq!(pos, 2);
488    }
489
490    #[test]
491    fn test_read_i16_biased_negative() {
492        let data = [0x00, 0x00]; // 0x0000 - 0x8000 = -32768
493        let mut pos = 0;
494        assert_eq!(read_i16_biased(&data, &mut pos), Some(-0x8000));
495    }
496
497    #[test]
498    fn test_read_i16_biased_truncated() {
499        let data = [0x80];
500        let mut pos = 0;
501        assert_eq!(read_i16_biased(&data, &mut pos), None);
502    }
503
504    #[test]
505    fn test_read_i24() {
506        let data = [0x00, 0x01, 0x00];
507        let mut pos = 0;
508        assert_eq!(read_i24(&data, &mut pos), Some(256));
509    }
510
511    // ── extract_text_slice ──────────────────────────────────────────────────
512
513    #[test]
514    fn test_extract_text_slice_basic() {
515        assert_eq!(extract_text_slice("hello world", 0, 5), "hello");
516        assert_eq!(extract_text_slice("hello world", 6, 5), "world");
517    }
518
519    #[test]
520    fn test_extract_text_slice_out_of_bounds() {
521        assert_eq!(extract_text_slice("hello", 10, 5), "");
522        assert_eq!(extract_text_slice("hello", 0, 100), "hello");
523    }
524
525    #[test]
526    fn test_extract_text_slice_utf8_boundary() {
527        // Multi-byte char: each char is 2 bytes
528        let s = "\u{00e9}\u{00e8}"; // é è — 2 bytes each
529        // Slicing at byte 1 (mid-char) should snap to boundary
530        let result = extract_text_slice(s, 1, 2);
531        assert!(result.is_char_boundary(0));
532    }
533
534    #[test]
535    fn test_extract_text_slice_empty() {
536        assert_eq!(extract_text_slice("", 0, 0), "");
537        assert_eq!(extract_text_slice("abc", 1, 0), "");
538    }
539
540    // ── Error paths ─────────────────────────────────────────────────────────
541
542    #[test]
543    fn test_too_short_data() {
544        assert!(matches!(
545            parse_text_layer(&[0x00], 100),
546            Err(TextError::TooShort)
547        ));
548        assert!(matches!(
549            parse_text_layer(&[], 100),
550            Err(TextError::TooShort)
551        ));
552    }
553
554    #[test]
555    fn test_text_overflow() {
556        // text_len = 0x00_00_FF (255) but only 3+1 bytes available
557        let data = [0x00, 0x00, 0xFF, 0x41];
558        assert!(matches!(
559            parse_text_layer(&data, 100),
560            Err(TextError::TextOverflow)
561        ));
562    }
563
564    #[test]
565    fn test_invalid_utf8() {
566        // text_len = 2, then 2 invalid bytes
567        let data = [0x00, 0x00, 0x02, 0xFF, 0xFE];
568        assert!(matches!(
569            parse_text_layer(&data, 100),
570            Err(TextError::InvalidUtf8)
571        ));
572    }
573
574    #[test]
575    fn test_unknown_zone_type() {
576        // text_len=1, text="A", version=0, then zone type=99 (invalid)
577        let data = [
578            0x00, 0x00, 0x01, // text_len = 1
579            b'A', // text
580            0x00, // version
581            99,   // invalid zone type
582        ];
583        assert!(matches!(
584            parse_text_layer(&data, 100),
585            Err(TextError::UnknownZoneType(99))
586        ));
587    }
588
589    #[test]
590    fn test_zone_truncated() {
591        // text_len=1, text="A", version=0, zone type=1 (Page), then truncated
592        let data = [
593            0x00, 0x00, 0x01, // text_len = 1
594            b'A', // text
595            0x00, // version
596            0x01, // zone type = Page
597            0x80, 0x00, // x (only partial fields)
598        ];
599        assert!(matches!(
600            parse_text_layer(&data, 100),
601            Err(TextError::ZoneTruncated(_))
602        ));
603    }
604
605    // ── Successful parse ────────────────────────────────────────────────────
606
607    #[test]
608    fn test_empty_text_no_zones() {
609        // text_len=0, no zones after that
610        let data = [0x00, 0x00, 0x00];
611        let result = parse_text_layer(&data, 100).unwrap();
612        assert_eq!(result.text, "");
613        assert!(result.zones.is_empty());
614    }
615
616    #[test]
617    fn test_text_only_no_zones() {
618        // text_len=5, text="Hello", version byte, then no zone data
619        let data = [
620            0x00, 0x00, 0x05, // text_len = 5
621            b'H', b'e', b'l', b'l', b'o', // text
622            0x00, // version
623        ];
624        let result = parse_text_layer(&data, 100).unwrap();
625        assert_eq!(result.text, "Hello");
626        assert!(result.zones.is_empty());
627    }
628
629    // ── TextLayer::transform ─────────────────────────────────────────────────
630
631    fn make_layer(x: u32, y: u32, w: u32, h: u32) -> TextLayer {
632        TextLayer {
633            text: "test".to_string(),
634            zones: vec![TextZone {
635                kind: TextZoneKind::Page,
636                rect: Rect {
637                    x,
638                    y,
639                    width: w,
640                    height: h,
641                },
642                text: "test".to_string(),
643                children: vec![],
644            }],
645        }
646    }
647
648    fn rect0(layer: &TextLayer) -> &Rect {
649        &layer.zones[0].rect
650    }
651
652    #[test]
653    fn transform_none_identity() {
654        // No rotation, 1:1 scale — rects unchanged
655        let layer = make_layer(10, 20, 30, 40);
656        let out = layer.transform(100, 200, Rotation::None, 100, 200);
657        assert_eq!(
658            *rect0(&out),
659            Rect {
660                x: 10,
661                y: 20,
662                width: 30,
663                height: 40
664            }
665        );
666    }
667
668    #[test]
669    fn transform_none_scale_2x() {
670        let layer = make_layer(10, 20, 30, 40);
671        let out = layer.transform(100, 200, Rotation::None, 200, 400);
672        assert_eq!(
673            *rect0(&out),
674            Rect {
675                x: 20,
676                y: 40,
677                width: 60,
678                height: 80
679            }
680        );
681    }
682
683    #[test]
684    fn transform_rot180() {
685        // page 100×200, rect (10, 20, 30, 40)
686        // new_x = 100 - 10 - 30 = 60
687        // new_y = 200 - 20 - 40 = 140
688        let layer = make_layer(10, 20, 30, 40);
689        let out = layer.transform(100, 200, Rotation::Rot180, 100, 200);
690        assert_eq!(
691            *rect0(&out),
692            Rect {
693                x: 60,
694                y: 140,
695                width: 30,
696                height: 40
697            }
698        );
699    }
700
701    #[test]
702    fn transform_cw90() {
703        // page 100×200, rect (x=10, y=20, w=30, h=40)
704        // displayed: 200 wide × 100 tall
705        // new_x = page_h - y - h = 200 - 20 - 40 = 140
706        // new_y = x = 10
707        // new_w = h = 40,  new_h = w = 30
708        let layer = make_layer(10, 20, 30, 40);
709        let out = layer.transform(100, 200, Rotation::Cw90, 200, 100);
710        assert_eq!(
711            *rect0(&out),
712            Rect {
713                x: 140,
714                y: 10,
715                width: 40,
716                height: 30
717            }
718        );
719    }
720
721    #[test]
722    fn transform_ccw90() {
723        // page 100×200, rect (x=10, y=20, w=30, h=40)
724        // displayed: 200 wide × 100 tall
725        // new_x = y = 20
726        // new_y = page_w - x - w = 100 - 10 - 30 = 60
727        // new_w = h = 40,  new_h = w = 30
728        let layer = make_layer(10, 20, 30, 40);
729        let out = layer.transform(100, 200, Rotation::Ccw90, 200, 100);
730        assert_eq!(
731            *rect0(&out),
732            Rect {
733                x: 20,
734                y: 60,
735                width: 40,
736                height: 30
737            }
738        );
739    }
740
741    #[test]
742    fn transform_cw90_then_scale() {
743        // page 100×200, rect (10, 20, 30, 40), render at 2× (400×200)
744        // After Cw90: (140, 10, 40, 30) in 200×100 space
745        // Scale ×2: (280, 20, 80, 60)
746        let layer = make_layer(10, 20, 30, 40);
747        let out = layer.transform(100, 200, Rotation::Cw90, 400, 200);
748        assert_eq!(
749            *rect0(&out),
750            Rect {
751                x: 280,
752                y: 20,
753                width: 80,
754                height: 60
755            }
756        );
757    }
758
759    #[test]
760    fn transform_text_preserved() {
761        let layer = make_layer(0, 0, 10, 10);
762        let out = layer.transform(100, 100, Rotation::Cw90, 100, 100);
763        assert_eq!(out.text, "test");
764        assert_eq!(out.zones[0].text, "test");
765    }
766
767    #[test]
768    fn test_single_word_zone() {
769        // Build a minimal text layer with one Page zone containing "Hi"
770        let text = b"Hi";
771        let mut data = Vec::new();
772        // text_len = 2 (u24be)
773        data.extend_from_slice(&[0x00, 0x00, 0x02]);
774        data.extend_from_slice(text);
775        data.push(0x00); // version
776
777        // Page zone (type=1)
778        data.push(0x01);
779        // x=0, y=0, w=100, h=50 (biased i16: value + 0x8000)
780        data.extend_from_slice(&0x8000u16.to_be_bytes()); // x=0
781        data.extend_from_slice(&0x8000u16.to_be_bytes()); // y=0
782        data.extend_from_slice(&(100u16 + 0x8000u16).wrapping_add(0).to_be_bytes()); // w=100
783        let h_val = 50i32 + 0x8000;
784        data.extend_from_slice(&(h_val as u16).to_be_bytes()); // h=50
785        data.extend_from_slice(&0x8000u16.to_be_bytes()); // text_start=0
786        // text_len = 2 (i24)
787        data.extend_from_slice(&[0x00, 0x00, 0x02]);
788        // children_count = 0 (i24)
789        data.extend_from_slice(&[0x00, 0x00, 0x00]);
790
791        let result = parse_text_layer(&data, 100).unwrap();
792        assert_eq!(result.text, "Hi");
793        assert_eq!(result.zones.len(), 1);
794        assert_eq!(result.zones[0].kind, TextZoneKind::Page);
795        assert_eq!(result.zones[0].text, "Hi");
796        assert_eq!(result.zones[0].rect.width, 100);
797        assert_eq!(result.zones[0].rect.height, 50);
798    }
799}