Skip to main content

pdf_annot/
annotation.rs

1//! Core annotation wrapper providing access to common annotation properties.
2
3extern crate alloc;
4
5use crate::appearance::AppearanceDict;
6use crate::types::*;
7use pdf_syntax::object::dict::keys::*;
8use pdf_syntax::object::{Array, Dict, Name, Rect, Stream};
9use pdf_syntax::page::Page;
10
11/// A PDF annotation, wrapping the raw dictionary.
12#[derive(Debug, Clone)]
13pub struct Annotation<'a> {
14    dict: Dict<'a>,
15}
16
17impl<'a> Annotation<'a> {
18    /// Parse all annotations from a page.
19    pub fn from_page(page: &Page<'a>) -> Vec<Self> {
20        page.annots()
21            .into_iter()
22            .map(|dict| Self { dict })
23            .collect()
24    }
25
26    /// Wrap an existing annotation dictionary.
27    pub fn from_dict(dict: Dict<'a>) -> Self {
28        Self { dict }
29    }
30
31    /// Return the raw annotation dictionary.
32    pub fn dict(&self) -> &Dict<'a> {
33        &self.dict
34    }
35
36    /// Return the annotation subtype.
37    pub fn annotation_type(&self) -> AnnotationType {
38        self.dict
39            .get::<Name>(SUBTYPE)
40            .map(|n| AnnotationType::from_name(n.as_ref()))
41            .unwrap_or(AnnotationType::Unknown)
42    }
43
44    /// Return the annotation rectangle.
45    pub fn rect(&self) -> Option<Rect> {
46        self.dict.get::<Rect>(RECT)
47    }
48
49    /// Return the text contents of the annotation.
50    pub fn contents(&self) -> Option<alloc::string::String> {
51        self.dict
52            .get::<pdf_syntax::object::String>(CONTENTS)
53            .map(|s| pdf_string_to_string(&s))
54    }
55
56    /// Return the annotation flags.
57    pub fn flags(&self) -> AnnotationFlags {
58        AnnotationFlags(self.dict.get::<u32>(F).unwrap_or(0))
59    }
60
61    /// Whether the annotation is hidden.
62    pub fn is_hidden(&self) -> bool {
63        self.flags().hidden()
64    }
65
66    /// Whether the annotation should be printed.
67    pub fn is_printable(&self) -> bool {
68        self.flags().print()
69    }
70
71    /// Return the unique annotation name (`/NM`).
72    pub fn name(&self) -> Option<alloc::string::String> {
73        self.dict
74            .get::<pdf_syntax::object::String>(NM)
75            .map(|s| pdf_string_to_string(&s))
76    }
77
78    /// Return the modification date (`/M`).
79    pub fn modified(&self) -> Option<alloc::string::String> {
80        self.dict
81            .get::<pdf_syntax::object::String>(M)
82            .map(|s| pdf_string_to_string(&s))
83    }
84
85    /// Return the author (`/T`).
86    pub fn author(&self) -> Option<alloc::string::String> {
87        self.dict
88            .get::<pdf_syntax::object::String>(T)
89            .map(|s| pdf_string_to_string(&s))
90    }
91
92    /// Return the subject (`/Subj`).
93    pub fn subject(&self) -> Option<alloc::string::String> {
94        self.dict
95            .get::<pdf_syntax::object::String>(SUBJ)
96            .map(|s| pdf_string_to_string(&s))
97    }
98
99    /// Return the annotation color (`/C`).
100    pub fn color(&self) -> Option<Color> {
101        self.dict
102            .get::<Array<'_>>(C)
103            .map(|arr| Color::from_array(&arr))
104    }
105
106    /// Return the border style (`/BS`).
107    pub fn border_style(&self) -> Option<BorderStyle> {
108        self.dict
109            .get::<Dict<'_>>(BS)
110            .map(|d| BorderStyle::from_dict(&d))
111    }
112
113    /// Return the legacy border array (`/Border`).
114    pub fn border_array(&self) -> Option<[f32; 3]> {
115        let arr = self.dict.get::<Array<'_>>(BORDER)?;
116        let mut iter = arr.iter::<f32>();
117        let h_radius = iter.next()?;
118        let v_radius = iter.next()?;
119        let width = iter.next()?;
120        Some([h_radius, v_radius, width])
121    }
122
123    /// Return the border effect (`/BE`).
124    pub fn border_effect(&self) -> Option<BorderEffect> {
125        self.dict
126            .get::<Dict<'_>>(BE)
127            .map(|d| BorderEffect::from_dict(&d))
128    }
129
130    /// Return the interior color (`/IC`).
131    pub fn interior_color(&self) -> Option<Color> {
132        self.dict
133            .get::<Array<'_>>(IC)
134            .map(|arr| Color::from_array(&arr))
135    }
136
137    /// Return the appearance dictionary (`/AP`).
138    pub fn appearance(&self) -> Option<AppearanceDict<'a>> {
139        AppearanceDict::from_annot(&self.dict)
140    }
141
142    /// Return the normal appearance stream directly.
143    pub fn normal_appearance(&self) -> Option<Stream<'a>> {
144        self.appearance()?.normal(&self.dict)
145    }
146
147    /// Return the creation date (`/CreationDate`).
148    pub fn creation_date(&self) -> Option<alloc::string::String> {
149        self.dict
150            .get::<pdf_syntax::object::String>(CREATION_DATE)
151            .map(|s| pdf_string_to_string(&s))
152    }
153
154    /// Return the opacity (`/CA`).
155    pub fn opacity(&self) -> Option<f32> {
156        self.dict.get::<f32>(CA)
157    }
158
159    /// Return the in-reply-to annotation dictionary (`/IRT`).
160    pub fn irt(&self) -> Option<Annotation<'a>> {
161        self.dict
162            .get::<Dict<'_>>(IRT)
163            .map(|d| Annotation { dict: d })
164    }
165
166    /// Return the reply type (`/RT`).
167    pub fn reply_type(&self) -> Option<alloc::string::String> {
168        self.dict
169            .get::<Name>(RT)
170            .map(|n| alloc::string::String::from(n.as_str()))
171    }
172
173    /// Return the annotation state (`/State`).
174    pub fn state(&self) -> Option<alloc::string::String> {
175        self.dict
176            .get::<Name>(STATE)
177            .map(|n| alloc::string::String::from(n.as_str()))
178    }
179
180    /// Return the state model (`/StateModel`).
181    pub fn state_model(&self) -> Option<alloc::string::String> {
182        self.dict
183            .get::<Name>(STATE_MODEL)
184            .map(|n| alloc::string::String::from(n.as_str()))
185    }
186
187    /// Return the popup annotation dictionary (`/Popup`).
188    pub fn popup(&self) -> Option<Annotation<'a>> {
189        self.dict
190            .get::<Dict<'_>>(POPUP)
191            .map(|d| Annotation { dict: d })
192    }
193
194    /// Return quad points (`/QuadPoints`).
195    pub fn quad_points(&self) -> Option<QuadPoints> {
196        self.dict
197            .get::<Array<'_>>(QUADPOINTS)
198            .map(|arr| QuadPoints::from_array(&arr))
199    }
200}
201
202/// Convert a PDF string (possibly UTF-16BE with BOM) to a Rust `String`.
203pub fn pdf_string_to_string(s: &pdf_syntax::object::String) -> alloc::string::String {
204    let bytes = s.as_bytes();
205    if bytes.len() >= 2 && bytes[0] == 0xFE && bytes[1] == 0xFF {
206        let utf16: Vec<u16> = bytes[2..]
207            .chunks_exact(2)
208            .map(|c| u16::from_be_bytes([c[0], c[1]]))
209            .collect();
210        alloc::string::String::from_utf16_lossy(&utf16)
211    } else if bytes.len() >= 3 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF {
212        // UTF-8 BOM
213        alloc::string::String::from_utf8_lossy(&bytes[3..]).into_owned()
214    } else {
215        // PDFDocEncoding: 0x00–0x7F are ASCII, 0x80–0xFF mapped per ISO 32000-2 D.2.
216        let mut s = alloc::string::String::with_capacity(bytes.len());
217        for &b in bytes {
218            s.push(pdfdoc_byte_to_char(b));
219        }
220        s
221    }
222}
223
224/// Map a single PDFDocEncoding byte to a Unicode char.
225///
226/// 0x00–0x7F match ASCII/Latin-1. 0x80–0xAD use the table from
227/// ISO 32000-2 Annex D, Table D.2. 0xAE–0xFF match U+00AE–U+00FF.
228fn pdfdoc_byte_to_char(b: u8) -> char {
229    #[rustfmt::skip]
230    static HIGH: [char; 46] = [
231        '\u{2022}', '\u{2020}', '\u{2021}', '\u{2026}', // 80–83
232        '\u{2014}', '\u{2013}', '\u{0192}', '\u{2044}', // 84–87
233        '\u{2039}', '\u{203A}', '\u{2212}', '\u{2030}', // 88–8B
234        '\u{201E}', '\u{201C}', '\u{201D}', '\u{2018}', // 8C–8F
235        '\u{2019}', '\u{201A}', '\u{2122}', '\u{FB01}', // 90–93
236        '\u{FB02}', '\u{0141}', '\u{0152}', '\u{0160}', // 94–97
237        '\u{0178}', '\u{017D}', '\u{0131}', '\u{0142}', // 98–9B
238        '\u{0153}', '\u{0161}', '\u{017E}', '\u{FFFD}', // 9C–9F
239        '\u{20AC}', '\u{00A1}', '\u{00A2}', '\u{00A3}', // A0–A3
240        '\u{00A4}', '\u{00A5}', '\u{00A6}', '\u{00A7}', // A4–A7
241        '\u{00A8}', '\u{00A9}', '\u{00AA}', '\u{00AB}', // A8–AB
242        '\u{00AC}', '\u{00AD}',                          // AC–AD
243    ];
244    match b {
245        0x00..=0x7F => b as char,
246        0x80..=0xAD => HIGH[(b - 0x80) as usize],
247        0xAE..=0xFF => char::from(b), // U+00AE–U+00FF same as Latin-1
248    }
249}