Skip to main content

djvu_rs/
annotation.rs

1//! DjVu annotation parser — phase 4.
2//!
3//! Parses ANTa (plain) and ANTz (BZZ-compressed) annotation chunks into
4//! typed structures.
5//!
6//! ## Key public types
7//!
8//! - `Annotation` — page-level annotation (background, zoom, mode)
9//! - `MapArea` — a clickable area with URL, description, and shape
10//! - `Shape` — rect / oval / poly / line / text area shape
11//! - `Color` — RGB color parsed from `#rrggbb` strings
12//! - `AnnotationError` — typed errors from this module
13//!
14//! ## Format notes
15//!
16//! ANTa/ANTz contain S-expression-like text:
17//! ```text
18//! (background #ffffff)
19//! (zoom 100)
20//! (mode color)
21//! (maparea "url" "desc" (rect x y w h) ...)
22//! ```
23//!
24//! This parser handles only the subset documented in the DjVu v3 spec
25//! (background, zoom, mode, maparea with rect/oval/poly/line/text shapes).
26
27#[cfg(not(feature = "std"))]
28use alloc::{
29    format,
30    string::{String, ToString},
31    vec::Vec,
32};
33
34use crate::{bzz_new::bzz_decode, error::BzzError};
35
36// ---- Error ------------------------------------------------------------------
37
38/// Errors from annotation parsing.
39#[derive(Debug, thiserror::Error)]
40pub enum AnnotationError {
41    /// BZZ decompression failed.
42    #[error("bzz decode failed: {0}")]
43    Bzz(#[from] BzzError),
44
45    /// A hex color string is malformed.
46    #[error("invalid color value: {0}")]
47    InvalidColor(String),
48
49    /// A numeric value could not be parsed.
50    #[error("invalid number: {0}")]
51    InvalidNumber(String),
52
53    /// The S-expression is malformed (missing closing paren, etc.).
54    #[error("malformed s-expression: {0}")]
55    Parse(String),
56}
57
58// ---- Public types -----------------------------------------------------------
59
60/// An RGB color value.
61#[derive(Debug, Clone, PartialEq, Eq)]
62#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
63pub struct Color {
64    pub r: u8,
65    pub g: u8,
66    pub b: u8,
67}
68
69/// Bounding rectangle in DjVu coordinates.
70///
71/// Note: coordinates are in DjVu native space (bottom-left origin).
72/// Integration with the text layer coordinate system requires manual remap.
73#[derive(Debug, Clone, PartialEq, Eq)]
74#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
75pub struct Rect {
76    pub x: u32,
77    pub y: u32,
78    pub width: u32,
79    pub height: u32,
80}
81
82/// Shape of a maparea.
83#[derive(Debug, Clone, PartialEq, Eq)]
84#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
85pub enum Shape {
86    Rect(Rect),
87    Oval(Rect),
88    Poly(Vec<(u32, u32)>),
89    Line(u32, u32, u32, u32),
90    Text(Rect),
91}
92
93/// A border style (currently stored as a raw string for forward-compat).
94#[derive(Debug, Clone, PartialEq, Eq)]
95#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
96pub struct Border {
97    pub style: String,
98}
99
100/// A highlight color for a maparea.
101#[derive(Debug, Clone, PartialEq, Eq)]
102#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
103pub struct Highlight {
104    pub color: Color,
105}
106
107/// A clickable map area (hyperlink or highlight region) in a DjVu page.
108#[derive(Debug, Clone)]
109#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
110pub struct MapArea {
111    /// Target URL (empty string if no link).
112    pub url: String,
113    /// Human-readable description.
114    pub description: String,
115    /// Shape of the area.
116    pub shape: Shape,
117    /// Optional border style.
118    pub border: Option<Border>,
119    /// Optional highlight color.
120    pub highlight: Option<Highlight>,
121}
122
123/// Page-level annotation data.
124#[derive(Debug, Clone, Default)]
125#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
126pub struct Annotation {
127    /// Background color for the page view.
128    pub background: Option<Color>,
129    /// Zoom level (percentage, e.g. 100 = 100%).
130    pub zoom: Option<u32>,
131    /// Display mode string (e.g. "color", "bw", "fore", "back").
132    pub mode: Option<String>,
133}
134
135// ---- Entry points -----------------------------------------------------------
136
137/// Parse an ANTa (plain-text) annotation chunk.
138pub fn parse_annotations(data: &[u8]) -> Result<(Annotation, Vec<MapArea>), AnnotationError> {
139    let text = core::str::from_utf8(data).unwrap_or("");
140    parse_annotation_text(text)
141}
142
143/// Parse an ANTz (BZZ-compressed) annotation chunk.
144pub fn parse_annotations_bzz(data: &[u8]) -> Result<(Annotation, Vec<MapArea>), AnnotationError> {
145    let decoded = bzz_decode(data)?;
146    let text = core::str::from_utf8(&decoded).unwrap_or("");
147    parse_annotation_text(text)
148}
149
150// ---- S-expression tokenizer -------------------------------------------------
151
152/// Minimal S-expression token.
153#[derive(Debug, PartialEq)]
154enum Token<'a> {
155    LParen,
156    RParen,
157    Atom(&'a str),
158    Quoted(String),
159}
160
161/// Tokenize an S-expression string into a flat Vec of tokens.
162fn tokenize(input: &str) -> Vec<Token<'_>> {
163    let mut tokens = Vec::new();
164    let bytes = input.as_bytes();
165    let mut i = 0;
166
167    while i < bytes.len() {
168        match bytes.get(i) {
169            Some(b'(') => {
170                tokens.push(Token::LParen);
171                i += 1;
172            }
173            Some(b')') => {
174                tokens.push(Token::RParen);
175                i += 1;
176            }
177            Some(b'"') => {
178                i += 1;
179                let start = i;
180                let mut s = String::new();
181                while i < bytes.len() {
182                    match bytes.get(i) {
183                        Some(b'\\') if i + 1 < bytes.len() => {
184                            i += 1;
185                            if let Some(&c) = bytes.get(i) {
186                                s.push(c as char);
187                            }
188                            i += 1;
189                        }
190                        Some(b'"') => {
191                            i += 1;
192                            break;
193                        }
194                        Some(&c) => {
195                            s.push(c as char);
196                            i += 1;
197                        }
198                        None => break,
199                    }
200                }
201                let _ = start; // consumed above
202                tokens.push(Token::Quoted(s));
203            }
204            Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'\r') => {
205                i += 1;
206            }
207            Some(b';') => {
208                // line comment
209                while i < bytes.len() && bytes.get(i) != Some(&b'\n') {
210                    i += 1;
211                }
212            }
213            _ => {
214                let start = i;
215                while i < bytes.len() {
216                    match bytes.get(i) {
217                        Some(b'(') | Some(b')') | Some(b'"') | Some(b' ') | Some(b'\t')
218                        | Some(b'\n') | Some(b'\r') => break,
219                        _ => i += 1,
220                    }
221                }
222                if let Some(slice) = input.get(start..i)
223                    && !slice.is_empty()
224                {
225                    tokens.push(Token::Atom(slice));
226                }
227            }
228        }
229    }
230
231    tokens
232}
233
234// ---- S-expression tree ------------------------------------------------------
235
236/// A node in the parsed S-expression tree.
237#[derive(Debug)]
238enum SExpr {
239    Atom(String),
240    List(Vec<SExpr>),
241}
242
243/// Parse tokens into a list of top-level S-expressions.
244fn parse_sexprs(tokens: &[Token<'_>]) -> Vec<SExpr> {
245    let mut result = Vec::new();
246    let mut pos = 0usize;
247    while pos < tokens.len() {
248        if let Some(expr) = parse_one(tokens, &mut pos) {
249            result.push(expr);
250        }
251    }
252    result
253}
254
255fn parse_one(tokens: &[Token<'_>], pos: &mut usize) -> Option<SExpr> {
256    match tokens.get(*pos) {
257        Some(Token::LParen) => {
258            *pos += 1;
259            let mut items = Vec::new();
260            loop {
261                match tokens.get(*pos) {
262                    Some(Token::RParen) => {
263                        *pos += 1;
264                        break;
265                    }
266                    None => break,
267                    _ => {
268                        if let Some(child) = parse_one(tokens, pos) {
269                            items.push(child);
270                        } else {
271                            break;
272                        }
273                    }
274                }
275            }
276            Some(SExpr::List(items))
277        }
278        Some(Token::RParen) => {
279            // Unexpected RParen — skip
280            *pos += 1;
281            None
282        }
283        Some(Token::Atom(s)) => {
284            let s = s.to_string();
285            *pos += 1;
286            Some(SExpr::Atom(s))
287        }
288        Some(Token::Quoted(s)) => {
289            let s = s.clone();
290            *pos += 1;
291            Some(SExpr::Atom(s))
292        }
293        None => None,
294    }
295}
296
297// ---- Annotation builder from S-expressions ----------------------------------
298
299fn parse_annotation_text(text: &str) -> Result<(Annotation, Vec<MapArea>), AnnotationError> {
300    if text.trim().is_empty() {
301        return Ok((Annotation::default(), Vec::new()));
302    }
303
304    let tokens = tokenize(text);
305    let exprs = parse_sexprs(&tokens);
306
307    let mut annotation = Annotation::default();
308    let mut mapareas = Vec::new();
309
310    for expr in &exprs {
311        if let SExpr::List(items) = expr {
312            let head = match items.first() {
313                Some(SExpr::Atom(s)) => s.as_str(),
314                _ => continue,
315            };
316
317            match head {
318                "background" => {
319                    if let Some(SExpr::Atom(color_str)) = items.get(1) {
320                        annotation.background = Some(parse_color(color_str)?);
321                    }
322                }
323                "zoom" => {
324                    if let Some(SExpr::Atom(n)) = items.get(1) {
325                        annotation.zoom = Some(parse_uint(n)?);
326                    }
327                }
328                "mode" => {
329                    if let Some(SExpr::Atom(m)) = items.get(1) {
330                        annotation.mode = Some(m.clone());
331                    }
332                }
333                "maparea" => {
334                    if let Some(ma) = parse_maparea(items)? {
335                        mapareas.push(ma);
336                    }
337                }
338                _ => {} // ignore unknown top-level forms
339            }
340        }
341    }
342
343    Ok((annotation, mapareas))
344}
345
346fn parse_maparea(items: &[SExpr]) -> Result<Option<MapArea>, AnnotationError> {
347    // (maparea "url" "desc" (shape ...) [options...])
348    let url = match items.get(1) {
349        Some(SExpr::Atom(s)) => s.clone(),
350        _ => String::new(),
351    };
352    let description = match items.get(2) {
353        Some(SExpr::Atom(s)) => s.clone(),
354        _ => String::new(),
355    };
356
357    let shape_expr = match items.get(3) {
358        Some(SExpr::List(l)) => l,
359        _ => return Ok(None),
360    };
361
362    let shape = parse_shape(shape_expr)?;
363
364    // Optional border / highlight (items[4..])
365    let mut border = None;
366    let mut highlight = None;
367    for item in items.get(4..).unwrap_or(&[]) {
368        if let SExpr::List(opts) = item {
369            match opts.first() {
370                Some(SExpr::Atom(s)) if s == "border" => {
371                    if let Some(SExpr::Atom(style)) = opts.get(1) {
372                        border = Some(Border {
373                            style: style.clone(),
374                        });
375                    }
376                }
377                Some(SExpr::Atom(s)) if s == "hilite" => {
378                    if let Some(SExpr::Atom(color)) = opts.get(1) {
379                        highlight = Some(Highlight {
380                            color: parse_color(color)?,
381                        });
382                    }
383                }
384                _ => {}
385            }
386        }
387    }
388
389    Ok(Some(MapArea {
390        url,
391        description,
392        shape,
393        border,
394        highlight,
395    }))
396}
397
398fn parse_shape(items: &[SExpr]) -> Result<Shape, AnnotationError> {
399    let kind = match items.first() {
400        Some(SExpr::Atom(s)) => s.as_str(),
401        _ => return Err(AnnotationError::Parse("shape has no kind".to_string())),
402    };
403
404    match kind {
405        "rect" => {
406            let x = get_uint(items, 1)?;
407            let y = get_uint(items, 2)?;
408            let w = get_uint(items, 3)?;
409            let h = get_uint(items, 4)?;
410            Ok(Shape::Rect(Rect {
411                x,
412                y,
413                width: w,
414                height: h,
415            }))
416        }
417        "oval" => {
418            let x = get_uint(items, 1)?;
419            let y = get_uint(items, 2)?;
420            let w = get_uint(items, 3)?;
421            let h = get_uint(items, 4)?;
422            Ok(Shape::Oval(Rect {
423                x,
424                y,
425                width: w,
426                height: h,
427            }))
428        }
429        "text" => {
430            let x = get_uint(items, 1)?;
431            let y = get_uint(items, 2)?;
432            let w = get_uint(items, 3)?;
433            let h = get_uint(items, 4)?;
434            Ok(Shape::Text(Rect {
435                x,
436                y,
437                width: w,
438                height: h,
439            }))
440        }
441        "line" => {
442            let x1 = get_uint(items, 1)?;
443            let y1 = get_uint(items, 2)?;
444            let x2 = get_uint(items, 3)?;
445            let y2 = get_uint(items, 4)?;
446            Ok(Shape::Line(x1, y1, x2, y2))
447        }
448        "poly" => {
449            // (poly x1 y1 x2 y2 ...)
450            let mut pts = Vec::new();
451            let mut i = 1usize;
452            while i + 1 < items.len() {
453                let x = get_uint(items, i)?;
454                let y = get_uint(items, i + 1)?;
455                pts.push((x, y));
456                i += 2;
457            }
458            Ok(Shape::Poly(pts))
459        }
460        other => Err(AnnotationError::Parse(format!(
461            "unknown shape kind: {other}"
462        ))),
463    }
464}
465
466// ---- Helpers ----------------------------------------------------------------
467
468fn get_uint(items: &[SExpr], idx: usize) -> Result<u32, AnnotationError> {
469    match items.get(idx) {
470        Some(SExpr::Atom(s)) => parse_uint(s),
471        _ => Err(AnnotationError::Parse(format!(
472            "expected uint at position {idx}"
473        ))),
474    }
475}
476
477fn parse_uint(s: &str) -> Result<u32, AnnotationError> {
478    s.parse::<u32>()
479        .map_err(|_| AnnotationError::InvalidNumber(s.to_string()))
480}
481
482fn parse_color(s: &str) -> Result<Color, AnnotationError> {
483    let hex = s.strip_prefix('#').unwrap_or(s);
484    if hex.len() != 6 {
485        return Err(AnnotationError::InvalidColor(s.to_string()));
486    }
487    let r = u8::from_str_radix(&hex[0..2], 16)
488        .map_err(|_| AnnotationError::InvalidColor(s.to_string()))?;
489    let g = u8::from_str_radix(&hex[2..4], 16)
490        .map_err(|_| AnnotationError::InvalidColor(s.to_string()))?;
491    let b = u8::from_str_radix(&hex[4..6], 16)
492        .map_err(|_| AnnotationError::InvalidColor(s.to_string()))?;
493    Ok(Color { r, g, b })
494}
495
496#[cfg(test)]
497mod tests {
498    use super::*;
499
500    // ── Tokenizer ───────────────────────────────────────────────────────────
501
502    #[test]
503    fn test_tokenize_basic() {
504        let tokens = tokenize("(background #ffffff)");
505        assert_eq!(tokens.len(), 4);
506        assert_eq!(tokens[0], Token::LParen);
507        assert!(matches!(&tokens[1], Token::Atom(s) if s == &"background"));
508        assert!(matches!(&tokens[2], Token::Atom(s) if s == &"#ffffff"));
509        assert_eq!(tokens[3], Token::RParen);
510    }
511
512    #[test]
513    fn test_tokenize_quoted_string() {
514        let tokens = tokenize(r#"(maparea "http://example.com" "desc")"#);
515        assert!(
516            tokens
517                .iter()
518                .any(|t| matches!(t, Token::Quoted(s) if s == "http://example.com"))
519        );
520    }
521
522    #[test]
523    fn test_tokenize_escape_in_quoted() {
524        let tokens = tokenize(r#""hello\"world""#);
525        assert_eq!(tokens.len(), 1);
526        assert!(matches!(&tokens[0], Token::Quoted(s) if s == r#"hello"world"#));
527    }
528
529    #[test]
530    fn test_tokenize_line_comment() {
531        let tokens = tokenize("; this is a comment\n(zoom 100)");
532        // Comment should be skipped
533        assert!(
534            tokens
535                .iter()
536                .any(|t| matches!(t, Token::Atom(s) if s == &"zoom"))
537        );
538    }
539
540    #[test]
541    fn test_tokenize_empty() {
542        assert!(tokenize("").is_empty());
543        assert!(tokenize("   \n\t  ").is_empty());
544    }
545
546    // ── Color parsing ───────────────────────────────────────────────────────
547
548    #[test]
549    fn test_parse_color_valid() {
550        let c = parse_color("#ff0080").unwrap();
551        assert_eq!(
552            c,
553            Color {
554                r: 255,
555                g: 0,
556                b: 128
557            }
558        );
559    }
560
561    #[test]
562    fn test_parse_color_no_hash() {
563        let c = parse_color("00ff00").unwrap();
564        assert_eq!(c, Color { r: 0, g: 255, b: 0 });
565    }
566
567    #[test]
568    fn test_parse_color_invalid_length() {
569        assert!(matches!(
570            parse_color("#fff"),
571            Err(AnnotationError::InvalidColor(_))
572        ));
573    }
574
575    #[test]
576    fn test_parse_color_invalid_hex() {
577        assert!(matches!(
578            parse_color("#gggggg"),
579            Err(AnnotationError::InvalidColor(_))
580        ));
581    }
582
583    // ── Number parsing ──────────────────────────────────────────────────────
584
585    #[test]
586    fn test_parse_uint_valid() {
587        assert_eq!(parse_uint("42").unwrap(), 42);
588        assert_eq!(parse_uint("0").unwrap(), 0);
589    }
590
591    #[test]
592    fn test_parse_uint_invalid() {
593        assert!(matches!(
594            parse_uint("abc"),
595            Err(AnnotationError::InvalidNumber(_))
596        ));
597        assert!(matches!(
598            parse_uint("-5"),
599            Err(AnnotationError::InvalidNumber(_))
600        ));
601    }
602
603    // ── Full annotation parsing ─────────────────────────────────────────────
604
605    #[test]
606    fn test_parse_empty() {
607        let (ann, areas) = parse_annotations(b"").unwrap();
608        assert!(ann.background.is_none());
609        assert!(areas.is_empty());
610    }
611
612    #[test]
613    fn test_parse_background() {
614        let (ann, _) = parse_annotations(b"(background #ff0000)").unwrap();
615        assert_eq!(ann.background, Some(Color { r: 255, g: 0, b: 0 }));
616    }
617
618    #[test]
619    fn test_parse_zoom_and_mode() {
620        let (ann, _) = parse_annotations(b"(zoom 150)(mode color)").unwrap();
621        assert_eq!(ann.zoom, Some(150));
622        assert_eq!(ann.mode.as_deref(), Some("color"));
623    }
624
625    #[test]
626    fn test_parse_maparea_rect() {
627        let input = br#"(maparea "http://example.com" "Example" (rect 10 20 100 50))"#;
628        let (_, areas) = parse_annotations(input).unwrap();
629        assert_eq!(areas.len(), 1);
630        assert_eq!(areas[0].url, "http://example.com");
631        assert_eq!(areas[0].description, "Example");
632        assert!(matches!(&areas[0].shape, Shape::Rect(r) if r.x == 10 && r.y == 20));
633    }
634
635    #[test]
636    fn test_parse_maparea_oval() {
637        let input = br#"(maparea "" "" (oval 0 0 50 50))"#;
638        let (_, areas) = parse_annotations(input).unwrap();
639        assert!(matches!(&areas[0].shape, Shape::Oval(_)));
640    }
641
642    #[test]
643    fn test_parse_maparea_poly() {
644        let input = br#"(maparea "" "" (poly 0 0 10 0 10 10 0 10))"#;
645        let (_, areas) = parse_annotations(input).unwrap();
646        if let Shape::Poly(pts) = &areas[0].shape {
647            assert_eq!(pts.len(), 4);
648            assert_eq!(pts[0], (0, 0));
649            assert_eq!(pts[2], (10, 10));
650        } else {
651            panic!("expected poly shape");
652        }
653    }
654
655    #[test]
656    fn test_parse_maparea_line() {
657        let input = br#"(maparea "" "" (line 0 0 100 100))"#;
658        let (_, areas) = parse_annotations(input).unwrap();
659        assert!(matches!(&areas[0].shape, Shape::Line(0, 0, 100, 100)));
660    }
661
662    #[test]
663    fn test_parse_maparea_with_border_and_hilite() {
664        let input = br#"(maparea "" "" (rect 0 0 10 10) (border solid) (hilite #00ff00))"#;
665        let (_, areas) = parse_annotations(input).unwrap();
666        assert_eq!(areas[0].border.as_ref().unwrap().style, "solid");
667        assert_eq!(
668            areas[0].highlight.as_ref().unwrap().color,
669            Color { r: 0, g: 255, b: 0 }
670        );
671    }
672
673    #[test]
674    fn test_parse_unknown_shape() {
675        let input = br#"(maparea "" "" (circle 0 0 10))"#;
676        assert!(matches!(
677            parse_annotations(input),
678            Err(AnnotationError::Parse(_))
679        ));
680    }
681
682    #[test]
683    fn test_parse_unknown_toplevel_ignored() {
684        let input = b"(unknown_key value)(zoom 100)";
685        let (ann, _) = parse_annotations(input).unwrap();
686        assert_eq!(ann.zoom, Some(100));
687    }
688
689    #[test]
690    fn test_parse_multiple_mapareas() {
691        let input = br#"(maparea "a" "" (rect 0 0 1 1))(maparea "b" "" (rect 2 2 3 3))"#;
692        let (_, areas) = parse_annotations(input).unwrap();
693        assert_eq!(areas.len(), 2);
694        assert_eq!(areas[0].url, "a");
695        assert_eq!(areas[1].url, "b");
696    }
697}