Skip to main content

pdfplumber_core/
annotation.rs

1//! PDF annotation types.
2//!
3//! Provides [`Annotation`] and [`AnnotationType`] for representing PDF page
4//! annotations such as text notes, links, highlights, and stamps.
5
6use crate::BBox;
7
8/// Common PDF annotation subtypes.
9///
10/// Covers the most frequently used annotation types defined in PDF 1.7 (Table 169).
11/// Unknown or rare subtypes are represented as [`AnnotationType::Other`].
12#[derive(Debug, Clone, PartialEq, Eq)]
13#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
14pub enum AnnotationType {
15    /// Text annotation (sticky note).
16    Text,
17    /// Link annotation (hyperlink or internal navigation).
18    Link,
19    /// Free text annotation (directly displayed text).
20    FreeText,
21    /// Highlight markup annotation.
22    Highlight,
23    /// Underline markup annotation.
24    Underline,
25    /// Strikeout markup annotation.
26    StrikeOut,
27    /// Stamp annotation.
28    Stamp,
29    /// Square annotation (rectangle shape).
30    Square,
31    /// Circle annotation (ellipse shape).
32    Circle,
33    /// Ink annotation (freehand drawing).
34    Ink,
35    /// Popup annotation (associated with another annotation).
36    Popup,
37    /// Widget annotation (form field).
38    Widget,
39    /// Other / unknown annotation subtype.
40    Other(String),
41}
42
43impl AnnotationType {
44    /// Parse an annotation type from a PDF /Subtype name.
45    pub fn from_subtype(subtype: &str) -> Self {
46        match subtype {
47            "Text" => Self::Text,
48            "Link" => Self::Link,
49            "FreeText" => Self::FreeText,
50            "Highlight" => Self::Highlight,
51            "Underline" => Self::Underline,
52            "StrikeOut" => Self::StrikeOut,
53            "Stamp" => Self::Stamp,
54            "Square" => Self::Square,
55            "Circle" => Self::Circle,
56            "Ink" => Self::Ink,
57            "Popup" => Self::Popup,
58            "Widget" => Self::Widget,
59            other => Self::Other(other.to_string()),
60        }
61    }
62}
63
64/// A PDF annotation extracted from a page.
65///
66/// Represents a single annotation with its type, bounding box, and optional
67/// metadata fields (contents, author, modification date).
68#[derive(Debug, Clone, PartialEq)]
69#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
70pub struct Annotation {
71    /// The annotation type (parsed from /Subtype).
72    pub annot_type: AnnotationType,
73    /// Bounding box of the annotation on the page.
74    pub bbox: BBox,
75    /// Text contents of the annotation (/Contents entry).
76    pub contents: Option<String>,
77    /// Author of the annotation (/T entry).
78    pub author: Option<String>,
79    /// Modification date (/M entry, raw PDF date string).
80    pub date: Option<String>,
81    /// Raw /Subtype name as it appears in the PDF.
82    pub raw_subtype: String,
83}
84
85#[cfg(test)]
86mod tests {
87    use super::*;
88
89    #[test]
90    fn annotation_type_from_known_subtypes() {
91        assert_eq!(AnnotationType::from_subtype("Text"), AnnotationType::Text);
92        assert_eq!(AnnotationType::from_subtype("Link"), AnnotationType::Link);
93        assert_eq!(
94            AnnotationType::from_subtype("FreeText"),
95            AnnotationType::FreeText
96        );
97        assert_eq!(
98            AnnotationType::from_subtype("Highlight"),
99            AnnotationType::Highlight
100        );
101        assert_eq!(
102            AnnotationType::from_subtype("Underline"),
103            AnnotationType::Underline
104        );
105        assert_eq!(
106            AnnotationType::from_subtype("StrikeOut"),
107            AnnotationType::StrikeOut
108        );
109        assert_eq!(AnnotationType::from_subtype("Stamp"), AnnotationType::Stamp);
110    }
111
112    #[test]
113    fn annotation_type_from_unknown_subtype() {
114        assert_eq!(
115            AnnotationType::from_subtype("Watermark"),
116            AnnotationType::Other("Watermark".to_string())
117        );
118    }
119
120    #[test]
121    fn annotation_with_all_fields() {
122        let annot = Annotation {
123            annot_type: AnnotationType::Text,
124            bbox: BBox::new(100.0, 200.0, 300.0, 250.0),
125            contents: Some("A comment".to_string()),
126            author: Some("Alice".to_string()),
127            date: Some("D:20240101120000".to_string()),
128            raw_subtype: "Text".to_string(),
129        };
130        assert_eq!(annot.annot_type, AnnotationType::Text);
131        assert_eq!(annot.contents.as_deref(), Some("A comment"));
132        assert_eq!(annot.author.as_deref(), Some("Alice"));
133        assert_eq!(annot.date.as_deref(), Some("D:20240101120000"));
134        assert_eq!(annot.raw_subtype, "Text");
135    }
136
137    #[test]
138    fn annotation_with_no_optional_fields() {
139        let annot = Annotation {
140            annot_type: AnnotationType::Link,
141            bbox: BBox::new(0.0, 0.0, 100.0, 20.0),
142            contents: None,
143            author: None,
144            date: None,
145            raw_subtype: "Link".to_string(),
146        };
147        assert_eq!(annot.annot_type, AnnotationType::Link);
148        assert!(annot.contents.is_none());
149        assert!(annot.author.is_none());
150        assert!(annot.date.is_none());
151    }
152
153    #[test]
154    fn annotation_clone_and_eq() {
155        let annot1 = Annotation {
156            annot_type: AnnotationType::Highlight,
157            bbox: BBox::new(10.0, 20.0, 30.0, 40.0),
158            contents: Some("highlighted".to_string()),
159            author: None,
160            date: None,
161            raw_subtype: "Highlight".to_string(),
162        };
163        let annot2 = annot1.clone();
164        assert_eq!(annot1, annot2);
165    }
166}